# Code common to build tools import copy import pathlib import sys import textwrap from numpy.distutils.misc_util import mingw32 #------------------- # Versioning support #------------------- # How to change C_API_VERSION ? # - increase C_API_VERSION value # - record the hash for the new C API with the cversions.py script # and add the hash to cversions.txt # The hash values are used to remind developers when the C API number was not # updated - generates a MismatchCAPIWarning warning which is turned into an # exception for released version. # Binary compatibility version number. This number is increased whenever the # C-API is changed such that binary compatibility is broken, i.e. whenever a # recompile of extension modules is needed. C_ABI_VERSION = 0x01000009 # Minor API version. This number is increased whenever a change is made to the # C-API -- whether it breaks binary compatibility or not. Some changes, such # as adding a function pointer to the end of the function table, can be made # without breaking binary compatibility. In this case, only the C_API_VERSION # (*not* C_ABI_VERSION) would be increased. Whenever binary compatibility is # broken, both C_API_VERSION and C_ABI_VERSION should be increased. # # The version needs to be kept in sync with that in cversions.txt. # # 0x00000008 - 1.7.x # 0x00000009 - 1.8.x # 0x00000009 - 1.9.x # 0x0000000a - 1.10.x # 0x0000000a - 1.11.x # 0x0000000a - 1.12.x # 0x0000000b - 1.13.x # 0x0000000c - 1.14.x # 0x0000000c - 1.15.x # 0x0000000d - 1.16.x # 0x0000000d - 1.19.x # 0x0000000e - 1.20.x # 0x0000000e - 1.21.x # 0x0000000f - 1.22.x # 0x00000010 - 1.23.x # 0x00000010 - 1.24.x C_API_VERSION = 0x00000010 class MismatchCAPIError(ValueError): pass def get_api_versions(apiversion, codegen_dir): """ Return current C API checksum and the recorded checksum. Return current C API checksum and the recorded checksum for the given version of the C API version. """ # Compute the hash of the current API as defined in the .txt files in # code_generators sys.path.insert(0, codegen_dir) try: m = __import__('genapi') numpy_api = __import__('numpy_api') curapi_hash = m.fullapi_hash(numpy_api.full_api) apis_hash = m.get_versions_hash() finally: del sys.path[0] return curapi_hash, apis_hash[apiversion] def check_api_version(apiversion, codegen_dir): """Emits a MismatchCAPIWarning if the C API version needs updating.""" curapi_hash, api_hash = get_api_versions(apiversion, codegen_dir) # If different hash, it means that the api .txt files in # codegen_dir have been updated without the API version being # updated. Any modification in those .txt files should be reflected # in the api and eventually abi versions. # To compute the checksum of the current API, use numpy/core/cversions.py if not curapi_hash == api_hash: msg = ("API mismatch detected, the C API version " "numbers have to be updated. Current C api version is " f"{apiversion}, with checksum {curapi_hash}, but recorded " f"checksum in core/codegen_dir/cversions.txt is {api_hash}. If " "functions were added in the C API, you have to update " f"C_API_VERSION in {__file__}." ) raise MismatchCAPIError(msg) FUNC_CALL_ARGS = {} def set_sig(sig): prefix, _, args = sig.partition("(") args = args.rpartition(")")[0] funcname = prefix.rpartition(" ")[-1] args = [arg.strip() for arg in args.split(",")] # We use {0} because 0 alone cannot be cast to complex on MSVC in C: FUNC_CALL_ARGS[funcname] = ", ".join("(%s){0}" % arg for arg in args) for file in [ "feature_detection_locale.h", "feature_detection_math.h", "feature_detection_cmath.h", "feature_detection_misc.h", "feature_detection_stdio.h", ]: with open(pathlib.Path(__file__).parent / file) as f: for line in f: if line.startswith("#"): continue if not line.strip(): continue set_sig(line) # Mandatory functions: if not found, fail the build # Some of these can still be blocklisted if the C99 implementation # is buggy, see numpy/core/src/common/npy_config.h MANDATORY_FUNCS = [ "sin", "cos", "tan", "sinh", "cosh", "tanh", "fabs", "floor", "ceil", "sqrt", "log10", "log", "exp", "asin", "acos", "atan", "fmod", 'modf', 'frexp', 'ldexp', "expm1", "log1p", "acosh", "asinh", "atanh", "rint", "trunc", "exp2", "copysign", "nextafter", "strtoll", "strtoull", "cbrt", "log2", "pow", "hypot", "atan2", "creal", "cimag", "conj" ] OPTIONAL_LOCALE_FUNCS = ["strtold_l"] OPTIONAL_FILE_FUNCS = ["ftello", "fseeko", "fallocate"] OPTIONAL_MISC_FUNCS = ["backtrace", "madvise"] # variable attributes tested via "int %s a" % attribute OPTIONAL_VARIABLE_ATTRIBUTES = ["__thread", "__declspec(thread)"] # Subset of OPTIONAL_*_FUNCS which may already have HAVE_* defined by Python.h OPTIONAL_FUNCS_MAYBE = [ "ftello", "fseeko" ] C99_COMPLEX_TYPES = [ 'complex double', 'complex float', 'complex long double' ] C99_COMPLEX_FUNCS = [ "cabs", "cacos", "cacosh", "carg", "casin", "casinh", "catan", "catanh", "cexp", "clog", "cpow", "csqrt", # The long double variants (like csinl) should be mandatory on C11, # but are missing in FreeBSD. Issue gh-22850 "csin", "csinh", "ccos", "ccosh", "ctan", "ctanh", ] OPTIONAL_HEADERS = [ # sse headers only enabled automatically on amd64/x32 builds "xmmintrin.h", # SSE "emmintrin.h", # SSE2 "immintrin.h", # AVX "features.h", # for glibc version linux "xlocale.h", # see GH#8367 "dlfcn.h", # dladdr "execinfo.h", # backtrace "libunwind.h", # backtrace for LLVM/Clang using libunwind "sys/mman.h", #madvise ] # optional gcc compiler builtins and their call arguments and optional a # required header and definition name (HAVE_ prepended) # call arguments are required as the compiler will do strict signature checking OPTIONAL_INTRINSICS = [("__builtin_isnan", '5.'), ("__builtin_isinf", '5.'), ("__builtin_isfinite", '5.'), ("__builtin_bswap32", '5u'), ("__builtin_bswap64", '5u'), ("__builtin_expect", '5, 0'), # Test `long long` for arm+clang 13 (gh-22811, # but we use all versions of __builtin_mul_overflow): ("__builtin_mul_overflow", '(long long)5, 5, (int*)5'), # MMX only needed for icc, but some clangs don't have it ("_m_from_int64", '0', "emmintrin.h"), ("_mm_load_ps", '(float*)0', "xmmintrin.h"), # SSE ("_mm_prefetch", '(float*)0, _MM_HINT_NTA', "xmmintrin.h"), # SSE ("_mm_load_pd", '(double*)0', "emmintrin.h"), # SSE2 ("__builtin_prefetch", "(float*)0, 0, 3"), # check that the linker can handle avx ("__asm__ volatile", '"vpand %xmm1, %xmm2, %xmm3"', "stdio.h", "LINK_AVX"), ("__asm__ volatile", '"vpand %ymm1, %ymm2, %ymm3"', "stdio.h", "LINK_AVX2"), ("__asm__ volatile", '"vpaddd %zmm1, %zmm2, %zmm3"', "stdio.h", "LINK_AVX512F"), ("__asm__ volatile", '"vfpclasspd $0x40, %zmm15, %k6\\n"\ "vmovdqu8 %xmm0, %xmm1\\n"\ "vpbroadcastmb2q %k0, %xmm0\\n"', "stdio.h", "LINK_AVX512_SKX"), ("__asm__ volatile", '"xgetbv"', "stdio.h", "XGETBV"), ] # function attributes # tested via "int %s %s(void *);" % (attribute, name) # function name will be converted to HAVE_ preprocessor macro OPTIONAL_FUNCTION_ATTRIBUTES = [('__attribute__((optimize("unroll-loops")))', 'attribute_optimize_unroll_loops'), ('__attribute__((optimize("O3")))', 'attribute_optimize_opt_3'), ('__attribute__((optimize("O2")))', 'attribute_optimize_opt_2'), ('__attribute__((nonnull (1)))', 'attribute_nonnull'), ] OPTIONAL_FUNCTION_ATTRIBUTES_AVX = [('__attribute__((target ("avx")))', 'attribute_target_avx'), ('__attribute__((target ("avx2")))', 'attribute_target_avx2'), ('__attribute__((target ("avx512f")))', 'attribute_target_avx512f'), ('__attribute__((target ("avx512f,avx512dq,avx512bw,avx512vl,avx512cd")))', 'attribute_target_avx512_skx'), ] # function attributes with intrinsics # To ensure your compiler can compile avx intrinsics with just the attributes # gcc 4.8.4 support attributes but not with intrisics # tested via "#include<%s> int %s %s(void *){code; return 0;};" % (header, attribute, name, code) # function name will be converted to HAVE_ preprocessor macro # The _mm512_castps_si512 instruction is specific check for AVX-512F support # in gcc-4.9 which is missing a subset of intrinsics. See # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61878 OPTIONAL_FUNCTION_ATTRIBUTES_WITH_INTRINSICS_AVX = [ ('__attribute__((target("avx2,fma")))', 'attribute_target_avx2_with_intrinsics', '__m256 temp = _mm256_set1_ps(1.0); temp = \ _mm256_fmadd_ps(temp, temp, temp)', 'immintrin.h'), ('__attribute__((target("avx512f")))', 'attribute_target_avx512f_with_intrinsics', '__m512i temp = _mm512_castps_si512(_mm512_set1_ps(1.0))', 'immintrin.h'), ('__attribute__((target ("avx512f,avx512dq,avx512bw,avx512vl,avx512cd")))', 'attribute_target_avx512_skx_with_intrinsics', '__mmask8 temp = _mm512_fpclass_pd_mask(_mm512_set1_pd(1.0), 0x01);\ __m512i unused_temp = \ _mm512_castps_si512(_mm512_set1_ps(1.0));\ _mm_mask_storeu_epi8(NULL, 0xFF, _mm_broadcastmb_epi64(temp))', 'immintrin.h'), ] def fname2def(name): return "HAVE_%s" % name.upper() def sym2def(symbol): define = symbol.replace(' ', '') return define.upper() def type2def(symbol): define = symbol.replace(' ', '_') return define.upper() # Code to detect long double representation taken from MPFR m4 macro def check_long_double_representation(cmd): cmd._check_compiler() body = LONG_DOUBLE_REPRESENTATION_SRC % {'type': 'long double'} # Disable whole program optimization (the default on vs2015, with python 3.5+) # which generates intermediary object files and prevents checking the # float representation. if sys.platform == "win32" and not mingw32(): try: cmd.compiler.compile_options.remove("/GL") except (AttributeError, ValueError): pass # Disable multi-file interprocedural optimization in the Intel compiler on Linux # which generates intermediary object files and prevents checking the # float representation. elif (sys.platform != "win32" and cmd.compiler.compiler_type.startswith('intel') and '-ipo' in cmd.compiler.cc_exe): newcompiler = cmd.compiler.cc_exe.replace(' -ipo', '') cmd.compiler.set_executables( compiler=newcompiler, compiler_so=newcompiler, compiler_cxx=newcompiler, linker_exe=newcompiler, linker_so=newcompiler + ' -shared' ) # We need to use _compile because we need the object filename src, obj = cmd._compile(body, None, None, 'c') try: ltype = long_double_representation(pyod(obj)) return ltype except ValueError: # try linking to support CC="gcc -flto" or icc -ipo # struct needs to be volatile so it isn't optimized away # additionally "clang -flto" requires the foo struct to be used body = body.replace('struct', 'volatile struct') body += "int main(void) { return foo.before[0]; }\n" src, obj = cmd._compile(body, None, None, 'c') cmd.temp_files.append("_configtest") cmd.compiler.link_executable([obj], "_configtest") ltype = long_double_representation(pyod("_configtest")) return ltype finally: cmd._clean() LONG_DOUBLE_REPRESENTATION_SRC = r""" /* "before" is 16 bytes to ensure there's no padding between it and "x". * We're not expecting any "long double" bigger than 16 bytes or with * alignment requirements stricter than 16 bytes. */ typedef %(type)s test_type; struct { char before[16]; test_type x; char after[8]; } foo = { { '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\001', '\043', '\105', '\147', '\211', '\253', '\315', '\357' }, -123456789.0, { '\376', '\334', '\272', '\230', '\166', '\124', '\062', '\020' } }; """ def pyod(filename): """Python implementation of the od UNIX utility (od -b, more exactly). Parameters ---------- filename : str name of the file to get the dump from. Returns ------- out : seq list of lines of od output Notes ----- We only implement enough to get the necessary information for long double representation, this is not intended as a compatible replacement for od. """ out = [] with open(filename, 'rb') as fid: yo2 = [oct(o)[2:] for o in fid.read()] for i in range(0, len(yo2), 16): line = ['%07d' % int(oct(i)[2:])] line.extend(['%03d' % int(c) for c in yo2[i:i+16]]) out.append(" ".join(line)) return out _BEFORE_SEQ = ['000', '000', '000', '000', '000', '000', '000', '000', '001', '043', '105', '147', '211', '253', '315', '357'] _AFTER_SEQ = ['376', '334', '272', '230', '166', '124', '062', '020'] _IEEE_DOUBLE_BE = ['301', '235', '157', '064', '124', '000', '000', '000'] _IEEE_DOUBLE_LE = _IEEE_DOUBLE_BE[::-1] _INTEL_EXTENDED_12B = ['000', '000', '000', '000', '240', '242', '171', '353', '031', '300', '000', '000'] _INTEL_EXTENDED_16B = ['000', '000', '000', '000', '240', '242', '171', '353', '031', '300', '000', '000', '000', '000', '000', '000'] _MOTOROLA_EXTENDED_12B = ['300', '031', '000', '000', '353', '171', '242', '240', '000', '000', '000', '000'] _IEEE_QUAD_PREC_BE = ['300', '031', '326', '363', '105', '100', '000', '000', '000', '000', '000', '000', '000', '000', '000', '000'] _IEEE_QUAD_PREC_LE = _IEEE_QUAD_PREC_BE[::-1] _IBM_DOUBLE_DOUBLE_BE = (['301', '235', '157', '064', '124', '000', '000', '000'] + ['000'] * 8) _IBM_DOUBLE_DOUBLE_LE = (['000', '000', '000', '124', '064', '157', '235', '301'] + ['000'] * 8) def long_double_representation(lines): """Given a binary dump as given by GNU od -b, look for long double representation.""" # Read contains a list of 32 items, each item is a byte (in octal # representation, as a string). We 'slide' over the output until read is of # the form before_seq + content + after_sequence, where content is the long double # representation: # - content is 12 bytes: 80 bits Intel representation # - content is 16 bytes: 80 bits Intel representation (64 bits) or quad precision # - content is 8 bytes: same as double (not implemented yet) read = [''] * 32 saw = None for line in lines: # we skip the first word, as od -b output an index at the beginning of # each line for w in line.split()[1:]: read.pop(0) read.append(w) # If the end of read is equal to the after_sequence, read contains # the long double if read[-8:] == _AFTER_SEQ: saw = copy.copy(read) # if the content was 12 bytes, we only have 32 - 8 - 12 = 12 # "before" bytes. In other words the first 4 "before" bytes went # past the sliding window. if read[:12] == _BEFORE_SEQ[4:]: if read[12:-8] == _INTEL_EXTENDED_12B: return 'INTEL_EXTENDED_12_BYTES_LE' if read[12:-8] == _MOTOROLA_EXTENDED_12B: return 'MOTOROLA_EXTENDED_12_BYTES_BE' # if the content was 16 bytes, we are left with 32-8-16 = 16 # "before" bytes, so 8 went past the sliding window. elif read[:8] == _BEFORE_SEQ[8:]: if read[8:-8] == _INTEL_EXTENDED_16B: return 'INTEL_EXTENDED_16_BYTES_LE' elif read[8:-8] == _IEEE_QUAD_PREC_BE: return 'IEEE_QUAD_BE' elif read[8:-8] == _IEEE_QUAD_PREC_LE: return 'IEEE_QUAD_LE' elif read[8:-8] == _IBM_DOUBLE_DOUBLE_LE: return 'IBM_DOUBLE_DOUBLE_LE' elif read[8:-8] == _IBM_DOUBLE_DOUBLE_BE: return 'IBM_DOUBLE_DOUBLE_BE' # if the content was 8 bytes, left with 32-8-8 = 16 bytes elif read[:16] == _BEFORE_SEQ: if read[16:-8] == _IEEE_DOUBLE_LE: return 'IEEE_DOUBLE_LE' elif read[16:-8] == _IEEE_DOUBLE_BE: return 'IEEE_DOUBLE_BE' if saw is not None: raise ValueError("Unrecognized format (%s)" % saw) else: # We never detected the after_sequence raise ValueError("Could not lock sequences (%s)" % saw) def check_for_right_shift_internal_compiler_error(cmd): """ On our arm CI, this fails with an internal compilation error The failure looks like the following, and can be reproduced on ARM64 GCC 5.4: : In function 'right_shift': :4:20: internal compiler error: in expand_shift_1, at expmed.c:2349 ip1[i] = ip1[i] >> in2; ^ Please submit a full bug report, with preprocessed source if appropriate. See for instructions. Compiler returned: 1 This function returns True if this compiler bug is present, and we need to turn off optimization for the function """ cmd._check_compiler() has_optimize = cmd.try_compile(textwrap.dedent("""\ __attribute__((optimize("O3"))) void right_shift() {} """), None, None) if not has_optimize: return False no_err = cmd.try_compile(textwrap.dedent("""\ typedef long the_type; /* fails also for unsigned and long long */ __attribute__((optimize("O3"))) void right_shift(the_type in2, the_type *ip1, int n) { for (int i = 0; i < n; i++) { if (in2 < (the_type)sizeof(the_type) * 8) { ip1[i] = ip1[i] >> in2; } } } """), None, None) return not no_err