|
numpy
2.0.0
|
#include <stdlib.h>#include <stdio.h>#include <assert.h>#include <Python.h>#include "numpy/ndarraytypes.h"#include "mem_overlap.h"#include "npy_extint128.h"Defines | |
| #define | NPY_NO_DEPRECATED_API NPY_API_VERSION |
| #define | MAX(a, b) (((a) >= (b)) ? (a) : (b)) |
| #define | MIN(a, b) (((a) <= (b)) ? (a) : (b)) |
Functions | |
| static void | euclid (npy_int64 a1, npy_int64 a2, npy_int64 *a_gcd, npy_int64 *gamma, npy_int64 *epsilon) |
| static int | diophantine_precompute (unsigned int n, diophantine_term_t *E, diophantine_term_t *Ep, npy_int64 *Gamma, npy_int64 *Epsilon) |
| static mem_overlap_t | diophantine_dfs (unsigned int n, unsigned int v, diophantine_term_t *E, diophantine_term_t *Ep, npy_int64 *Gamma, npy_int64 *Epsilon, npy_int64 b, Py_ssize_t max_work, int require_ub_nontrivial, npy_int64 *x, Py_ssize_t *count) |
| NPY_VISIBILITY_HIDDEN mem_overlap_t | solve_diophantine (unsigned int n, diophantine_term_t *E, npy_int64 b, Py_ssize_t max_work, int require_ub_nontrivial, npy_int64 *x) |
| static int | diophantine_sort_A (const void *xp, const void *yp) |
| NPY_VISIBILITY_HIDDEN int | diophantine_simplify (unsigned int *n, diophantine_term_t *E, npy_int64 b) |
| NPY_VISIBILITY_HIDDEN void | offset_bounds_from_strides (const int itemsize, const int nd, const npy_intp *dims, const npy_intp *strides, npy_intp *lower_offset, npy_intp *upper_offset) |
| static void | get_array_memory_extents (PyArrayObject *arr, npy_uintp *out_start, npy_uintp *out_end, npy_uintp *num_bytes) |
| static int | strides_to_terms (PyArrayObject *arr, diophantine_term_t *terms, unsigned int *nterms, int skip_empty) |
| NPY_VISIBILITY_HIDDEN mem_overlap_t | solve_may_share_memory (PyArrayObject *a, PyArrayObject *b, Py_ssize_t max_work) |
| NPY_VISIBILITY_HIDDEN mem_overlap_t | solve_may_have_internal_overlap (PyArrayObject *a, Py_ssize_t max_work) |
| #define MAX | ( | a, | |
| b | |||
| ) | (((a) >= (b)) ? (a) : (b)) |
| #define MIN | ( | a, | |
| b | |||
| ) | (((a) <= (b)) ? (a) : (b)) |
Referenced by diophantine_sort_A().
| #define NPY_NO_DEPRECATED_API NPY_API_VERSION |
| static mem_overlap_t diophantine_dfs | ( | unsigned int | n, |
| unsigned int | v, | ||
| diophantine_term_t * | E, | ||
| diophantine_term_t * | Ep, | ||
| npy_int64 * | Gamma, | ||
| npy_int64 * | Epsilon, | ||
| npy_int64 | b, | ||
| Py_ssize_t | max_work, | ||
| int | require_ub_nontrivial, | ||
| npy_int64 * | x, | ||
| Py_ssize_t * | count | ||
| ) | [static] |
References MEM_OVERLAP_NO.
Referenced by solve_diophantine().
| static int diophantine_precompute | ( | unsigned int | n, |
| diophantine_term_t * | E, | ||
| diophantine_term_t * | Ep, | ||
| npy_int64 * | Gamma, | ||
| npy_int64 * | Epsilon | ||
| ) | [static] |
References diophantine_term_t::a, safe_add(), safe_mul(), and diophantine_term_t::ub.
Referenced by solve_diophantine().
| NPY_VISIBILITY_HIDDEN int diophantine_simplify | ( | unsigned int * | n, |
| diophantine_term_t * | E, | ||
| npy_int64 | b | ||
| ) |
| static int diophantine_sort_A | ( | const void * | xp, |
| const void * | yp | ||
| ) | [static] |
References MIN, and diophantine_term_t::ub.
| static void euclid | ( | npy_int64 | a1, |
| npy_int64 | a2, | ||
| npy_int64 * | a_gcd, | ||
| npy_int64 * | gamma, | ||
| npy_int64 * | epsilon | ||
| ) | [static] |
| static void get_array_memory_extents | ( | PyArrayObject * | arr, |
| npy_uintp * | out_start, | ||
| npy_uintp * | out_end, | ||
| npy_uintp * | num_bytes | ||
| ) | [static] |
| NPY_VISIBILITY_HIDDEN void offset_bounds_from_strides | ( | const int | itemsize, |
| const int | nd, | ||
| const npy_intp * | dims, | ||
| const npy_intp * | strides, | ||
| npy_intp * | lower_offset, | ||
| npy_intp * | upper_offset | ||
| ) |
| NPY_VISIBILITY_HIDDEN mem_overlap_t solve_diophantine | ( | unsigned int | n, |
| diophantine_term_t * | E, | ||
| npy_int64 | b, | ||
| Py_ssize_t | max_work, | ||
| int | require_ub_nontrivial, | ||
| npy_int64 * | x | ||
| ) |
A[0] x[0] + A[1] x[1] + ... + A[n-1] x[n-1] == b 0 <= x[i] <= U[i] A[i] > 0
References diophantine_dfs(), diophantine_precompute(), MEM_OVERLAP_ERROR, and MEM_OVERLAP_OVERFLOW.
| NPY_VISIBILITY_HIDDEN mem_overlap_t solve_may_have_internal_overlap | ( | PyArrayObject * | a, |
| Py_ssize_t | max_work | ||
| ) |
solutions to <blockquote> sum(a*x) = b, 0 <= x[i] <= ub[i]</blockquote>
for any b. Equivalently, <blockquote> sum(a*x0) - sum(a*x1) = 0</blockquote>
Mapping the coefficients on the left by x0'[i] = x0[i] if a[i] > 0 else ub[i]-x0[i] and opposite for x1, we have <blockquote> sum(abs(a)*(x0' + x1')) = sum(abs(a)*ub)</blockquote>
Now, x0!=x1 if for some i we have x0'[i] + x1'[i] != ub[i]. We can now change variables to z[i] = x0'[i] + x1'[i] so the problem becomes <blockquote> sum(abs(a)*z) = sum(abs(a)*ub), 0 <= z[i] <= 2*ub[i], z != ub</blockquote>
This can be solved with solve_diophantine.
| NPY_VISIBILITY_HIDDEN mem_overlap_t solve_may_share_memory | ( | PyArrayObject * | a, |
| PyArrayObject * | b, | ||
| Py_ssize_t | max_work | ||
| ) |
The bounds computed by offset_bounds_from_strides correspond to all-positive strides.
start1 + sum(abs(stride1)*x1) == start2 + sum(abs(stride2)*x2) == end1 - 1 - sum(abs(stride1)*x1') == end2 - 1 - sum(abs(stride2)*x2')
<=>
sum(abs(stride1)*x1) + sum(abs(stride2)*x2') == end2 - 1 - start1
OR
sum(abs(stride1)*x1') + sum(abs(stride2)*x2) == end1 - 1 - start2
We pick the problem with the smaller RHS (they are non-negative due to the extent check above.)
References MEM_OVERLAP_NO.
Referenced by raw_array_is_aligned().
| static int strides_to_terms | ( | PyArrayObject * | arr, |
| diophantine_term_t * | terms, | ||
| unsigned int * | nterms, | ||
| int | skip_empty | ||
| ) | [static] |