libflame revision_anchor
Functions
blis_prototypes_level1.h File Reference

(r)

Go to the source code of this file.

Functions

void bl1_samax (int n, float *x, int incx, int *index)
 
void bl1_damax (int n, double *x, int incx, int *index)
 
void bl1_camax (int n, scomplex *x, int incx, int *index)
 
void bl1_zamax (int n, dcomplex *x, int incx, int *index)
 
void bl1_sasum (int n, float *x, int incx, float *norm)
 
void bl1_dasum (int n, double *x, int incx, double *norm)
 
void bl1_casum (int n, scomplex *x, int incx, float *norm)
 
void bl1_zasum (int n, dcomplex *x, int incx, double *norm)
 
void bl1_saxpy (int n, float *alpha, float *x, int incx, float *y, int incy)
 
void bl1_daxpy (int n, double *alpha, double *x, int incx, double *y, int incy)
 
void bl1_caxpy (int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
 
void bl1_zaxpy (int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy)
 
void bl1_saxpyv (conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy)
 
void bl1_daxpyv (conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy)
 
void bl1_caxpyv (conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
 
void bl1_zaxpyv (conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy)
 
void bl1_saxpymt (trans1_t trans, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
 
void bl1_daxpymt (trans1_t trans, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
 
void bl1_caxpymt (trans1_t trans, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
 
void bl1_zaxpymt (trans1_t trans, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
 
void bl1_saxpymrt (uplo1_t uplo, trans1_t trans, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
 
void bl1_daxpymrt (uplo1_t uplo, trans1_t trans, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
 
void bl1_caxpymrt (uplo1_t uplo, trans1_t trans, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
 
void bl1_zaxpymrt (uplo1_t uplo, trans1_t trans, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
 
void bl1_saxpysv (int n, float *alpha0, float *alpha1, float *x, int incx, float *beta, float *y, int incy)
 
void bl1_daxpysv (int n, double *alpha0, double *alpha1, double *x, int incx, double *beta, double *y, int incy)
 
void bl1_caxpysv (int n, scomplex *alpha0, scomplex *alpha1, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
 
void bl1_zaxpysv (int n, dcomplex *alpha0, dcomplex *alpha1, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
 
void bl1_saxpysmt (trans1_t trans, int m, int n, float *alpha0, float *alpha1, float *a, int a_rs, int a_cs, float *beta, float *b, int b_rs, int b_cs)
 
void bl1_daxpysmt (trans1_t trans, int m, int n, double *alpha0, double *alpha1, double *a, int a_rs, int a_cs, double *beta, double *b, int b_rs, int b_cs)
 
void bl1_caxpysmt (trans1_t trans, int m, int n, scomplex *alpha0, scomplex *alpha1, scomplex *a, int a_rs, int a_cs, scomplex *beta, scomplex *b, int b_rs, int b_cs)
 
void bl1_zaxpysmt (trans1_t trans, int m, int n, dcomplex *alpha0, dcomplex *alpha1, dcomplex *a, int a_rs, int a_cs, dcomplex *beta, dcomplex *b, int b_rs, int b_cs)
 
void bl1_sconjv (int m, float *x, int incx)
 
void bl1_dconjv (int m, double *x, int incx)
 
void bl1_cconjv (int m, scomplex *x, int incx)
 
void bl1_zconjv (int m, dcomplex *x, int incx)
 
void bl1_sconjm (int m, int n, float *a, int a_rs, int a_cs)
 
void bl1_dconjm (int m, int n, double *a, int a_rs, int a_cs)
 
void bl1_cconjm (int m, int n, scomplex *a, int a_rs, int a_cs)
 
void bl1_zconjm (int m, int n, dcomplex *a, int a_rs, int a_cs)
 
void bl1_sconjmr (uplo1_t uplo, int m, int n, float *a, int a_rs, int a_cs)
 
void bl1_dconjmr (uplo1_t uplo, int m, int n, double *a, int a_rs, int a_cs)
 
void bl1_cconjmr (uplo1_t uplo, int m, int n, scomplex *a, int a_rs, int a_cs)
 
void bl1_zconjmr (uplo1_t uplo, int m, int n, dcomplex *a, int a_rs, int a_cs)
 
void bl1_scopy (int m, float *x, int incx, float *y, int incy)
 
void bl1_dcopy (int m, double *x, int incx, double *y, int incy)
 
void bl1_ccopy (int m, scomplex *x, int incx, scomplex *y, int incy)
 
void bl1_zcopy (int m, dcomplex *x, int incx, dcomplex *y, int incy)
 
void bl1_icopyv (conj1_t conj, int m, int *x, int incx, int *y, int incy)
 
void bl1_scopyv (conj1_t conj, int m, float *x, int incx, float *y, int incy)
 
void bl1_dcopyv (conj1_t conj, int m, double *x, int incx, double *y, int incy)
 
void bl1_ccopyv (conj1_t conj, int m, scomplex *x, int incx, scomplex *y, int incy)
 
void bl1_zcopyv (conj1_t conj, int m, dcomplex *x, int incx, dcomplex *y, int incy)
 
void bl1_sdcopyv (conj1_t conj, int m, float *x, int incx, double *y, int incy)
 
void bl1_dscopyv (conj1_t conj, int m, double *x, int incx, float *y, int incy)
 
void bl1_sccopyv (conj1_t conj, int m, float *x, int incx, scomplex *y, int incy)
 
void bl1_cscopyv (conj1_t conj, int m, scomplex *x, int incx, float *y, int incy)
 
void bl1_szcopyv (conj1_t conj, int m, float *x, int incx, dcomplex *y, int incy)
 
void bl1_zscopyv (conj1_t conj, int m, dcomplex *x, int incx, float *y, int incy)
 
void bl1_dccopyv (conj1_t conj, int m, double *x, int incx, scomplex *y, int incy)
 
void bl1_cdcopyv (conj1_t conj, int m, scomplex *x, int incx, double *y, int incy)
 
void bl1_dzcopyv (conj1_t conj, int m, double *x, int incx, dcomplex *y, int incy)
 
void bl1_zdcopyv (conj1_t conj, int m, dcomplex *x, int incx, double *y, int incy)
 
void bl1_czcopyv (conj1_t conj, int m, scomplex *x, int incx, dcomplex *y, int incy)
 
void bl1_zccopyv (conj1_t conj, int m, dcomplex *x, int incx, scomplex *y, int incy)
 
void bl1_scopymr (uplo1_t uplo, int m, int n, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
 
void bl1_dcopymr (uplo1_t uplo, int m, int n, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
 
void bl1_ccopymr (uplo1_t uplo, int m, int n, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
 
void bl1_zcopymr (uplo1_t uplo, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
 
void bl1_sscopymr (uplo1_t uplo, int m, int n, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
 
void bl1_sdcopymr (uplo1_t uplo, int m, int n, float *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
 
void bl1_dscopymr (uplo1_t uplo, int m, int n, double *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
 
void bl1_sccopymr (uplo1_t uplo, int m, int n, float *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
 
void bl1_cscopymr (uplo1_t uplo, int m, int n, scomplex *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
 
void bl1_szcopymr (uplo1_t uplo, int m, int n, float *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
 
void bl1_zscopymr (uplo1_t uplo, int m, int n, dcomplex *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
 
void bl1_ddcopymr (uplo1_t uplo, int m, int n, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
 
void bl1_dccopymr (uplo1_t uplo, int m, int n, double *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
 
void bl1_cdcopymr (uplo1_t uplo, int m, int n, scomplex *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
 
void bl1_dzcopymr (uplo1_t uplo, int m, int n, double *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
 
void bl1_zdcopymr (uplo1_t uplo, int m, int n, dcomplex *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
 
void bl1_cccopymr (uplo1_t uplo, int m, int n, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
 
void bl1_czcopymr (uplo1_t uplo, int m, int n, scomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
 
void bl1_zccopymr (uplo1_t uplo, int m, int n, dcomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
 
void bl1_zzcopymr (uplo1_t uplo, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
 
void bl1_scopymrt (uplo1_t uplo, trans1_t trans, int m, int n, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
 
void bl1_dcopymrt (uplo1_t uplo, trans1_t trans, int m, int n, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
 
void bl1_ccopymrt (uplo1_t uplo, trans1_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
 
void bl1_zcopymrt (uplo1_t uplo, trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
 
void bl1_sscopymrt (uplo1_t uplo, trans1_t trans, int m, int n, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
 
void bl1_sdcopymrt (uplo1_t uplo, trans1_t trans, int m, int n, float *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
 
void bl1_sccopymrt (uplo1_t uplo, trans1_t trans, int m, int n, float *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
 
void bl1_szcopymrt (uplo1_t uplo, trans1_t trans, int m, int n, float *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
 
void bl1_dscopymrt (uplo1_t uplo, trans1_t trans, int m, int n, double *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
 
void bl1_ddcopymrt (uplo1_t uplo, trans1_t trans, int m, int n, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
 
void bl1_dccopymrt (uplo1_t uplo, trans1_t trans, int m, int n, double *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
 
void bl1_dzcopymrt (uplo1_t uplo, trans1_t trans, int m, int n, double *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
 
void bl1_cscopymrt (uplo1_t uplo, trans1_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
 
void bl1_cdcopymrt (uplo1_t uplo, trans1_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
 
void bl1_cccopymrt (uplo1_t uplo, trans1_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
 
void bl1_czcopymrt (uplo1_t uplo, trans1_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
 
void bl1_zscopymrt (uplo1_t uplo, trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
 
void bl1_zdcopymrt (uplo1_t uplo, trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
 
void bl1_zccopymrt (uplo1_t uplo, trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
 
void bl1_zzcopymrt (uplo1_t uplo, trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
 
void bl1_icopymt (trans1_t trans, int m, int n, int *a, int a_rs, int a_cs, int *b, int b_rs, int b_cs)
 
void bl1_scopymt (trans1_t trans, int m, int n, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
 
void bl1_dcopymt (trans1_t trans, int m, int n, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
 
void bl1_ccopymt (trans1_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
 
void bl1_zcopymt (trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
 
void bl1_sscopymt (trans1_t trans, int m, int n, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
 
void bl1_sdcopymt (trans1_t trans, int m, int n, float *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
 
void bl1_dscopymt (trans1_t trans, int m, int n, double *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
 
void bl1_sccopymt (trans1_t trans, int m, int n, float *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
 
void bl1_cscopymt (trans1_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
 
void bl1_szcopymt (trans1_t trans, int m, int n, float *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
 
void bl1_zscopymt (trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
 
void bl1_ddcopymt (trans1_t trans, int m, int n, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
 
void bl1_dccopymt (trans1_t trans, int m, int n, double *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
 
void bl1_cdcopymt (trans1_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
 
void bl1_dzcopymt (trans1_t trans, int m, int n, double *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
 
void bl1_zdcopymt (trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
 
void bl1_cccopymt (trans1_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
 
void bl1_czcopymt (trans1_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
 
void bl1_zccopymt (trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
 
void bl1_zzcopymt (trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
 
void bl1_cdot_in (conj1_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho)
 
void bl1_zdot_in (conj1_t conj, int n, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *rho)
 
void bl1_sdot (conj1_t conj, int n, float *x, int incx, float *y, int incy, float *rho)
 
void bl1_ddot (conj1_t conj, int n, double *x, int incx, double *y, int incy, double *rho)
 
void bl1_cdot (conj1_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho)
 
void bl1_zdot (conj1_t conj, int n, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *rho)
 
void bl1_sdots (conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy, float *beta, float *rho)
 
void bl1_ddots (conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy, double *beta, double *rho)
 
void bl1_cdots (conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy, scomplex *beta, scomplex *rho)
 
void bl1_zdots (conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *beta, dcomplex *rho)
 
void bl1_sdot2s (conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy, float *beta, float *rho)
 
void bl1_ddot2s (conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy, double *beta, double *rho)
 
void bl1_cdot2s (conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy, scomplex *beta, scomplex *rho)
 
void bl1_zdot2s (conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *beta, dcomplex *rho)
 
void bl1_sfnorm (int m, int n, float *a, int a_rs, int a_cs, float *norm)
 
void bl1_dfnorm (int m, int n, double *a, int a_rs, int a_cs, double *norm)
 
void bl1_cfnorm (int m, int n, scomplex *a, int a_rs, int a_cs, float *norm)
 
void bl1_zfnorm (int m, int n, dcomplex *a, int a_rs, int a_cs, double *norm)
 
void bl1_sinvscalv (conj1_t conj, int n, float *alpha, float *x, int incx)
 
void bl1_dinvscalv (conj1_t conj, int n, double *alpha, double *x, int incx)
 
void bl1_csinvscalv (conj1_t conj, int n, float *alpha, scomplex *x, int incx)
 
void bl1_cinvscalv (conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx)
 
void bl1_zdinvscalv (conj1_t conj, int n, double *alpha, dcomplex *x, int incx)
 
void bl1_zinvscalv (conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx)
 
void bl1_sinvscalm (conj1_t conj, int m, int n, float *alpha, float *a, int a_rs, int a_cs)
 
void bl1_dinvscalm (conj1_t conj, int m, int n, double *alpha, double *a, int a_rs, int a_cs)
 
void bl1_csinvscalm (conj1_t conj, int m, int n, float *alpha, scomplex *a, int a_rs, int a_cs)
 
void bl1_cinvscalm (conj1_t conj, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs)
 
void bl1_zdinvscalm (conj1_t conj, int m, int n, double *alpha, dcomplex *a, int a_rs, int a_cs)
 
void bl1_zinvscalm (conj1_t conj, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs)
 
void bl1_snrm2 (int n, float *x, int incx, float *norm)
 
void bl1_dnrm2 (int n, double *x, int incx, double *norm)
 
void bl1_cnrm2 (int n, scomplex *x, int incx, float *norm)
 
void bl1_znrm2 (int n, dcomplex *x, int incx, double *norm)
 
void bl1_sscal (int n, float *alpha, float *x, int incx)
 
void bl1_dscal (int n, double *alpha, double *x, int incx)
 
void bl1_csscal (int n, float *alpha, scomplex *x, int incx)
 
void bl1_cscal (int n, scomplex *alpha, scomplex *x, int incx)
 
void bl1_zdscal (int n, double *alpha, dcomplex *x, int incx)
 
void bl1_zscal (int n, dcomplex *alpha, dcomplex *x, int incx)
 
void bl1_sscalv (conj1_t conj, int n, float *alpha, float *x, int incx)
 
void bl1_dscalv (conj1_t conj, int n, double *alpha, double *x, int incx)
 
void bl1_csscalv (conj1_t conj, int n, float *alpha, scomplex *x, int incx)
 
void bl1_cscalv (conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx)
 
void bl1_zdscalv (conj1_t conj, int n, double *alpha, dcomplex *x, int incx)
 
void bl1_zscalv (conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx)
 
void bl1_sscalm (conj1_t conj, int m, int n, float *alpha, float *a, int a_rs, int a_cs)
 
void bl1_dscalm (conj1_t conj, int m, int n, double *alpha, double *a, int a_rs, int a_cs)
 
void bl1_csscalm (conj1_t conj, int m, int n, float *alpha, scomplex *a, int a_rs, int a_cs)
 
void bl1_cscalm (conj1_t conj, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs)
 
void bl1_zdscalm (conj1_t conj, int m, int n, double *alpha, dcomplex *a, int a_rs, int a_cs)
 
void bl1_zscalm (conj1_t conj, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs)
 
void bl1_sscalmr (uplo1_t uplo, int m, int n, float *alpha, float *a, int a_rs, int a_cs)
 
void bl1_dscalmr (uplo1_t uplo, int m, int n, double *alpha, double *a, int a_rs, int a_cs)
 
void bl1_csscalmr (uplo1_t uplo, int m, int n, float *alpha, scomplex *a, int a_rs, int a_cs)
 
void bl1_cscalmr (uplo1_t uplo, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs)
 
void bl1_zdscalmr (uplo1_t uplo, int m, int n, double *alpha, dcomplex *a, int a_rs, int a_cs)
 
void bl1_zscalmr (uplo1_t uplo, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs)
 
void bl1_sswap (int n, float *x, int incx, float *y, int incy)
 
void bl1_dswap (int n, double *x, int incx, double *y, int incy)
 
void bl1_cswap (int n, scomplex *x, int incx, scomplex *y, int incy)
 
void bl1_zswap (int n, dcomplex *x, int incx, dcomplex *y, int incy)
 
void bl1_sswapv (int n, float *x, int incx, float *y, int incy)
 
void bl1_dswapv (int n, double *x, int incx, double *y, int incy)
 
void bl1_cswapv (int n, scomplex *x, int incx, scomplex *y, int incy)
 
void bl1_zswapv (int n, dcomplex *x, int incx, dcomplex *y, int incy)
 
void bl1_sswapmt (trans1_t trans, int m, int n, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
 
void bl1_dswapmt (trans1_t trans, int m, int n, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
 
void bl1_cswapmt (trans1_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
 
void bl1_zswapmt (trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
 

Function Documentation

◆ bl1_camax()

void bl1_camax ( int  n,
scomplex x,
int  incx,
int index 
)
36{
37#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
38 *index = cblas_icamax( n,
39 x, incx );
40#else
41 *index = F77_icamax( &n,
42 x, &incx ) - 1;
43#endif
44}
int i
Definition bl1_axmyv2.c:145
int F77_icamax(int *n, scomplex *x, int *incx)
CBLAS_INDEX cblas_icamax(const int N, const void *X, const int incX)

References cblas_icamax(), and F77_icamax().

Referenced by FLA_Amax_external(), FLA_LU_piv_opc_var3(), FLA_LU_piv_opc_var4(), FLA_LU_piv_opc_var5(), and FLA_SA_LU_unb().

◆ bl1_casum()

void bl1_casum ( int  n,
scomplex x,
int  incx,
float norm 
)
36{
37#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
38 *norm = cblas_scasum( n,
39 x, incx );
40#else
41 *norm = F77_scasum( &n,
42 x, &incx );
43#endif
44}
float F77_scasum(int *n, scomplex *x, int *incx)
float cblas_scasum(const int N, const void *X, const int incX)

References cblas_scasum(), and F77_scasum().

Referenced by FLA_Asum_external().

◆ bl1_caxpy()

void bl1_caxpy ( int  n,
scomplex alpha,
scomplex x,
int  incx,
scomplex y,
int  incy 
)
44{
45#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
46 cblas_caxpy( n,
47 alpha,
48 x, incx,
49 y, incy );
50#else
51 F77_caxpy( &n,
52 alpha,
53 x, &incx,
54 y, &incy );
55#endif
56}
void F77_caxpy(int *n, scomplex *alpha, scomplex *x, int *incx, scomplex *y, int *incy)
void cblas_caxpy(const int N, const void *alpha, const void *X, const int incX, void *Y, const int incY)

References cblas_caxpy(), and F77_caxpy().

Referenced by bl1_caxpymt(), bl1_caxpysmt(), bl1_caxpysv(), and bl1_caxpyv().

◆ bl1_caxpymrt()

void bl1_caxpymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
scomplex alpha,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)
228{
231 int lda, inca;
232 int ldb, incb;
233 int n_iter;
234 int n_elem;
235 int n_elem_max;
237 int j;
239
240 // Return early if possible.
241 if ( bl1_zero_dim2( m, n ) ) return;
242
243 // Initialize variables based on storage format of B and value of uplo.
244 if ( bl1_is_col_storage( b_rs, b_cs ) )
245 {
246 if ( bl1_is_lower( uplo ) )
247 {
248 n_iter = bl1_min( m, n );
249 n_elem_max = m;
250 lda = a_cs;
251 inca = a_rs;
252 ldb = b_cs;
253 incb = b_rs;
255 }
256 else // if ( bl1_is_upper( uplo ) )
257 {
258 n_iter = n;
259 n_elem_max = bl1_min( m, n );
260 lda = a_cs;
261 inca = a_rs;
262 ldb = b_cs;
263 incb = b_rs;
265 }
266 }
267 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
268 {
269 if ( bl1_is_lower( uplo ) )
270 {
271 n_iter = m;
272 n_elem_max = bl1_min( m, n );
273 lda = a_rs;
274 inca = a_cs;
275 ldb = b_rs;
276 incb = b_cs;
278 }
279 else // if ( bl1_is_upper( uplo ) )
280 {
281 n_iter = bl1_min( m, n );
282 n_elem_max = n;
283 lda = a_rs;
284 inca = a_cs;
285 ldb = b_rs;
286 incb = b_cs;
288 }
289 }
290
291 // Swap lda and inca if we're doing a transpose.
292 if ( bl1_does_trans( trans ) )
293 {
295 }
296
297 // Extract conj component from trans parameter.
299
300 // Choose the loop based on whether n_elem will be shrinking or growing
301 // with each iteration.
303 {
304 for ( j = 0; j < n_iter; j++ )
305 {
306 n_elem = n_elem_max - j;
307 a_begin = a + j*lda + j*inca;
308 b_begin = b + j*ldb + j*incb;
309
311 n_elem,
312 alpha,
313 a_begin, inca,
314 b_begin, incb );
315 }
316 }
317 else // if ( n_elem_is_ascending )
318 {
319 for ( j = 0; j < n_iter; j++ )
320 {
321 n_elem = bl1_min( j + 1, n_elem_max );
322 a_begin = a + j*lda;
323 b_begin = b + j*ldb;
324
326 n_elem,
327 alpha,
328 a_begin, inca,
329 b_begin, incb );
330 }
331 }
332}
void bl1_caxpyv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
Definition bl1_axpyv.c:29
int bl1_is_lower(uplo1_t uplo)
Definition bl1_is.c:49
int bl1_is_col_storage(int rs, int cs)
Definition bl1_is.c:90
conj1_t bl1_proj_trans1_to_conj(trans1_t trans)
Definition bl1_proj.c:13
int bl1_zero_dim2(int m, int n)
Definition bl1_is.c:118
int bl1_does_trans(trans1_t trans)
Definition bl1_does.c:13
conj1_t
Definition blis_type_defs.h:80
Definition blis_type_defs.h:133

References bl1_caxpyv(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), and bl1_zero_dim2().

Referenced by bl1_cher2k(), bl1_cherk(), and FLA_Axpyrt_external().

◆ bl1_caxpymt()

void bl1_caxpymt ( trans1_t  trans,
int  m,
int  n,
scomplex alpha,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)
150{
154 int inca_temp;
155 int lda, inca;
156 int ldb, incb;
157 int n_iter;
158 int n_elem;
159 int j;
160
161 // Return early if possible.
162 if ( bl1_zero_dim2( m, n ) ) return;
163
164 // Handle cases where A and B are vectors to ensure that the underlying axpy
165 // gets invoked only once.
166 if ( bl1_is_vector( m, n ) )
167 {
168 // Initialize with values appropriate for vectors.
169 n_iter = 1;
170 n_elem = bl1_vector_dim( m, n );
171 lda = 1; // multiplied by zero when n_iter == 1; not needed.
172 inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
173 ldb = 1; // multiplied by zero when n_iter == 1; not needed.
175 }
176 else // matrix case
177 {
178 // Initialize with optimal values for column-major storage.
179 n_iter = n;
180 n_elem = m;
181 lda = a_cs;
182 inca = a_rs;
183 ldb = b_cs;
184 incb = b_rs;
185
186 // Handle the transposition of A.
187 if ( bl1_does_trans( trans ) )
188 {
190 }
191
192 // An optimization: if B is row-major and if A is effectively row-major
193 // after a possible transposition, then let's access the matrices by rows
194 // instead of by columns for increased spatial locality.
195 if ( bl1_is_row_storage( b_rs, b_cs ) )
196 {
197 if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
199 {
203 }
204 }
205 }
206
207 if ( bl1_does_conj( trans ) )
208 {
210
212 inca_temp = 1;
213
214 for ( j = 0; j < n_iter; j++ )
215 {
216 a_begin = a + j*lda;
217 b_begin = b + j*ldb;
218
220 n_elem,
221 a_begin, inca,
222 a_temp, inca_temp );
223
225 alpha,
227 b_begin, incb );
228 }
229
230 bl1_cfree( a_temp );
231 }
232 else // if ( !bl1_does_conj( trans ) )
233 {
234 for ( j = 0; j < n_iter; j++ )
235 {
236 a_begin = a + j*lda;
237 b_begin = b + j*ldb;
238
240 alpha,
241 a_begin, inca,
242 b_begin, incb );
243 }
244
245 }
246}
void bl1_caxpy(int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
Definition bl1_axpy.c:43
void bl1_ccopyv(conj1_t conj, int m, scomplex *x, int incx, scomplex *y, int incy)
Definition bl1_copyv.c:49
int bl1_does_notrans(trans1_t trans)
Definition bl1_does.c:19
int bl1_does_conj(trans1_t trans)
Definition bl1_does.c:25
int bl1_is_row_storage(int rs, int cs)
Definition bl1_is.c:95
int bl1_is_vector(int m, int n)
Definition bl1_is.c:106
int bl1_vector_dim(int m, int n)
Definition bl1_vector.c:13
int bl1_vector_inc(trans1_t trans, int m, int n, int rs, int cs)
Definition bl1_vector.c:19
void bl1_cfree(scomplex *p)
Definition bl1_free.c:40
scomplex * bl1_callocv(unsigned int n_elem)
Definition bl1_allocv.c:40
@ BLIS1_NO_TRANSPOSE
Definition blis_type_defs.h:54

References bl1_callocv(), bl1_caxpy(), bl1_ccopyv(), bl1_cfree(), bl1_does_conj(), bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by bl1_cgemm(), bl1_chemm(), bl1_csymm(), bl1_ctrmmsx(), bl1_ctrsmsx(), FLA_Axpy_external(), and FLA_Axpyt_external().

◆ bl1_caxpysmt()

void bl1_caxpysmt ( trans1_t  trans,
int  m,
int  n,
scomplex alpha0,
scomplex alpha1,
scomplex a,
int  a_rs,
int  a_cs,
scomplex beta,
scomplex b,
int  b_rs,
int  b_cs 
)
164{
169 int inca_temp;
170 int lda, inca;
171 int ldb, incb;
172 int n_iter;
173 int n_elem;
174 int j;
175
176 // Return early if possible.
177 if ( bl1_zero_dim2( m, n ) ) return;
178
179 alpha_prod.real = alpha0->real * alpha1->real - alpha0->imag * alpha1->imag;
180 alpha_prod.imag = alpha0->real * alpha1->imag + alpha0->imag * alpha1->real;
181
182 // Handle cases where A and B are vectors to ensure that the underlying axpy
183 // gets invoked only once.
184 if ( bl1_is_vector( m, n ) )
185 {
186 // Initialize with values appropriate for vectors.
187 n_iter = 1;
188 n_elem = bl1_vector_dim( m, n );
189 lda = 1; // multiplied by zero when n_iter == 1; not needed.
190 inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
191 ldb = 1; // multiplied by zero when n_iter == 1; not needed.
193 }
194 else // matrix case
195 {
196 // Initialize with optimal values for column-major storage.
197 n_iter = n;
198 n_elem = m;
199 lda = a_cs;
200 inca = a_rs;
201 ldb = b_cs;
202 incb = b_rs;
203
204 // Handle the transposition of A.
205 if ( bl1_does_trans( trans ) )
206 {
208 }
209
210 // An optimization: if B is row-major and if A is effectively row-major
211 // after a possible transposition, then let's access the matrices by rows
212 // instead of by columns for increased spatial locality.
213 if ( bl1_is_row_storage( b_rs, b_cs ) )
214 {
215 if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
217 {
221 }
222 }
223 }
224
225 if ( bl1_does_conj( trans ) )
226 {
228
230 inca_temp = 1;
231
232 for ( j = 0; j < n_iter; j++ )
233 {
234 a_begin = a + j*lda;
235 b_begin = b + j*ldb;
236
238 n_elem,
239 a_begin, inca,
240 a_temp, inca_temp );
241
243 beta,
244 b_begin, incb );
245
247 &alpha_prod,
249 b_begin, incb );
250 }
251
252 bl1_cfree( a_temp );
253 }
254 else // if ( !bl1_does_conj( trans ) )
255 {
256 for ( j = 0; j < n_iter; j++ )
257 {
258 a_begin = a + j*lda;
259 b_begin = b + j*ldb;
260
262 beta,
263 b_begin, incb );
264
266 &alpha_prod,
267 a_begin, inca,
268 b_begin, incb );
269 }
270 }
271}
double *restrict alpha1
Definition bl1_axpyv2bdotaxpy.c:198
void bl1_cscal(int n, scomplex *alpha, scomplex *x, int incx)
Definition bl1_scal.c:52
float real
Definition blis_type_defs.h:134

References alpha1, bl1_callocv(), bl1_caxpy(), bl1_ccopyv(), bl1_cfree(), bl1_cscal(), bl1_does_conj(), bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), BLIS1_NO_TRANSPOSE, scomplex::imag, and scomplex::real.

Referenced by FLA_Axpys_external().

◆ bl1_caxpysv()

void bl1_caxpysv ( int  n,
scomplex alpha0,
scomplex alpha1,
scomplex x,
int  incx,
scomplex beta,
scomplex y,
int  incy 
)
52{
54
55 // Return early if possible.
56 if ( bl1_zero_dim1( n ) ) return;
57
58 alpha_prod.real = alpha0->real * alpha1->real - alpha0->imag * alpha1->imag;
59 alpha_prod.imag = alpha0->real * alpha1->imag + alpha0->imag * alpha1->real;
60
61 bl1_cscal( n,
62 beta,
63 y, incy );
64
65 bl1_caxpy( n,
67 x, incx,
68 y, incy );
69}
int bl1_zero_dim1(int m)
Definition bl1_is.c:113

References alpha1, bl1_caxpy(), bl1_cscal(), bl1_zero_dim1(), scomplex::imag, and scomplex::real.

Referenced by FLA_Lyap_h_opc_var2(), FLA_Lyap_h_opc_var3(), FLA_Lyap_h_opc_var4(), FLA_Lyap_n_opc_var2(), FLA_Lyap_n_opc_var3(), and FLA_Lyap_n_opc_var4().

◆ bl1_caxpyv()

void bl1_caxpyv ( conj1_t  conj,
int  n,
scomplex alpha,
scomplex x,
int  incx,
scomplex y,
int  incy 
)
30{
32 int incx_copy;
33
34 // Return early if possible.
35 if ( bl1_zero_dim1( n ) ) return;
36
37 x_copy = x;
39
40 if ( bl1_is_conj( conj ) )
41 {
42 x_copy = bl1_callocv( n );
43 incx_copy = 1;
44
46 n,
47 x, incx,
49 }
50
51 bl1_caxpy( n,
52 alpha,
54 y, incy );
55
56 if ( bl1_is_conj( conj ) )
58}
int bl1_is_conj(conj1_t conj)
Definition bl1_is.c:42

References bl1_callocv(), bl1_caxpy(), bl1_ccopyv(), bl1_cfree(), bl1_is_conj(), and bl1_zero_dim1().

Referenced by bl1_caxpymrt(), bl1_cgemv(), bl1_chemv(), bl1_ctrmvsx(), bl1_ctrsvsx(), FLA_Apply_H2_UT_l_opc_var1(), FLA_Apply_H2_UT_r_opc_var1(), FLA_Apply_HUD_UT_l_opc_var1(), FLA_Bidiag_UT_u_step_ofc_var2(), FLA_Bidiag_UT_u_step_ofc_var3(), FLA_Bidiag_UT_u_step_ofc_var4(), FLA_Bidiag_UT_u_step_opc_var2(), FLA_Bidiag_UT_u_step_opc_var3(), FLA_Bidiag_UT_u_step_opc_var4(), FLA_Bidiag_UT_u_step_opc_var5(), FLA_Eig_gest_il_opc_var1(), FLA_Eig_gest_il_opc_var2(), FLA_Eig_gest_il_opc_var3(), FLA_Eig_gest_il_opc_var4(), FLA_Eig_gest_il_opc_var5(), FLA_Eig_gest_iu_opc_var1(), FLA_Eig_gest_iu_opc_var2(), FLA_Eig_gest_iu_opc_var3(), FLA_Eig_gest_iu_opc_var4(), FLA_Eig_gest_iu_opc_var5(), FLA_Eig_gest_nl_opc_var1(), FLA_Eig_gest_nl_opc_var2(), FLA_Eig_gest_nl_opc_var4(), FLA_Eig_gest_nl_opc_var5(), FLA_Eig_gest_nu_opc_var1(), FLA_Eig_gest_nu_opc_var2(), FLA_Eig_gest_nu_opc_var4(), FLA_Eig_gest_nu_opc_var5(), FLA_Fused_Ahx_Ax_opc_var1(), FLA_Fused_Ahx_Axpy_Ax_opc_var1(), FLA_Fused_Gerc2_Ahx_Ax_opc_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_opc_var1(), FLA_Fused_Gerc2_opc_var1(), FLA_Fused_Her2_Ax_l_opc_var1(), FLA_Fused_Uhu_Yhu_Zhu_opc_var1(), FLA_Fused_UYx_ZVx_opc_var1(), FLA_Fused_UZhu_ZUhu_opc_var1(), FLA_Hess_UT_step_ofc_var2(), FLA_Hess_UT_step_ofc_var3(), FLA_Hess_UT_step_ofc_var4(), FLA_Hess_UT_step_opc_var2(), FLA_Hess_UT_step_opc_var3(), FLA_Hess_UT_step_opc_var4(), FLA_Hess_UT_step_opc_var5(), FLA_Tridiag_UT_l_step_ofc_var2(), FLA_Tridiag_UT_l_step_ofc_var3(), FLA_Tridiag_UT_l_step_opc_var1(), FLA_Tridiag_UT_l_step_opc_var2(), and FLA_Tridiag_UT_l_step_opc_var3().

◆ bl1_cccopymr()

void bl1_cccopymr ( uplo1_t  uplo,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)
1051{
1054 int lda, inca;
1055 int ldb, incb;
1056 int n_iter;
1057 int n_elem_max;
1058 int n_elem;
1059 int j;
1060
1061 // Return early if possible.
1062 if ( bl1_zero_dim2( m, n ) ) return;
1063
1064 // We initialize for column-major.
1065 n_iter = n;
1066 n_elem_max = m;
1067 lda = a_cs;
1068 inca = a_rs;
1069 ldb = b_cs;
1070 incb = b_rs;
1071
1072 // An optimization: if B is row-major, then let's access the matrix
1073 // by rows instead of by columns for increased spatial locality.
1074 if ( bl1_is_row_storage( b_rs, b_cs ) )
1075 {
1079 bl1_toggle_uplo( uplo );
1080 }
1081
1082
1083 if ( bl1_is_upper( uplo ) )
1084 {
1085 for ( j = 0; j < n_iter; j++ )
1086 {
1087 n_elem = bl1_min( j + 1, n_elem_max );
1088 a_begin = a + j*lda;
1089 b_begin = b + j*ldb;
1090
1092 n_elem,
1093 a_begin, inca,
1094 b_begin, incb );
1095 }
1096 }
1097 else // if ( bl1_is_lower( uplo ) )
1098 {
1099 for ( j = 0; j < n_iter; j++ )
1100 {
1101 n_elem = bl1_max( 0, n_elem_max - j );
1102 a_begin = a + j*lda + j*inca;
1103 b_begin = b + j*ldb + j*incb;
1104
1105 if ( n_elem <= 0 ) break;
1106
1108 n_elem,
1109 a_begin, inca,
1110 b_begin, incb );
1111 }
1112 }
1113}
int bl1_is_upper(uplo1_t uplo)
Definition bl1_is.c:54
@ BLIS1_NO_CONJUGATE
Definition blis_type_defs.h:81

References bl1_ccopyv(), bl1_is_row_storage(), bl1_is_upper(), bl1_zero_dim2(), and BLIS1_NO_CONJUGATE.

◆ bl1_cccopymrt()

void bl1_cccopymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)
1497{
1500 int lda, inca;
1501 int ldb, incb;
1502 int n_iter;
1503 int n_elem;
1504 int n_elem_max;
1506 int j;
1507 conj1_t conj;
1508
1509 // Return early if possible.
1510 if ( bl1_zero_dim2( m, n ) ) return;
1511
1512 // Initialize variables based on storage format of B and value of uplo.
1513 if ( bl1_is_col_storage( b_rs, b_cs ) )
1514 {
1515 if ( bl1_is_lower( uplo ) )
1516 {
1517 n_iter = bl1_min( m, n );
1518 n_elem_max = m;
1519 lda = a_cs;
1520 inca = a_rs;
1521 ldb = b_cs;
1522 incb = b_rs;
1524 }
1525 else // if ( bl1_is_upper( uplo ) )
1526 {
1527 n_iter = n;
1528 n_elem_max = bl1_min( m, n );
1529 lda = a_cs;
1530 inca = a_rs;
1531 ldb = b_cs;
1532 incb = b_rs;
1534 }
1535 }
1536 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
1537 {
1538 if ( bl1_is_lower( uplo ) )
1539 {
1540 n_iter = m;
1541 n_elem_max = bl1_min( m, n );
1542 lda = a_rs;
1543 inca = a_cs;
1544 ldb = b_rs;
1545 incb = b_cs;
1547 }
1548 else // if ( bl1_is_upper( uplo ) )
1549 {
1550 n_iter = bl1_min( m, n );
1551 n_elem_max = n;
1552 lda = a_rs;
1553 inca = a_cs;
1554 ldb = b_rs;
1555 incb = b_cs;
1557 }
1558 }
1559
1560 // Swap lda and inca if we're doing a transpose.
1561 if ( bl1_does_trans( trans ) )
1562 {
1564 }
1565
1566 // Extract conj component from trans parameter.
1568
1569 // Choose the loop based on whether n_elem will be shrinking or growing
1570 // with each iteration.
1572 {
1573 for ( j = 0; j < n_iter; j++ )
1574 {
1575 n_elem = n_elem_max - j;
1576 a_begin = a + j*lda + j*inca;
1577 b_begin = b + j*ldb + j*incb;
1578
1580 n_elem,
1581 a_begin, inca,
1582 b_begin, incb );
1583 }
1584 }
1585 else // if ( n_elem_is_ascending )
1586 {
1587 for ( j = 0; j < n_iter; j++ )
1588 {
1589 n_elem = bl1_min( j + 1, n_elem_max );
1590 a_begin = a + j*lda;
1591 b_begin = b + j*ldb;
1592
1594 n_elem,
1595 a_begin, inca,
1596 b_begin, incb );
1597 }
1598 }
1599}

References bl1_ccopyv(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), and bl1_zero_dim2().

◆ bl1_cccopymt()

void bl1_cccopymt ( trans1_t  trans,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)
1167{
1170 int lda, inca;
1171 int ldb, incb;
1172 int n_iter;
1173 int n_elem;
1174 int j;
1175 conj1_t conj;
1176
1177 // Return early if possible.
1178 if ( bl1_zero_dim2( m, n ) ) return;
1179
1180 // Handle cases where A and B are vectors to ensure that the underlying copy
1181 // gets invoked only once.
1182 if ( bl1_is_vector( m, n ) )
1183 {
1184 // Initialize with values appropriate for vectors.
1185 n_iter = 1;
1186 n_elem = bl1_vector_dim( m, n );
1187 lda = 1; // multiplied by zero when n_iter == 1; not needed.
1188 inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
1189 ldb = 1; // multiplied by zero when n_iter == 1; not needed.
1191 }
1192 else // matrix case
1193 {
1194 // Initialize with optimal values for column-major storage of B.
1195 n_iter = n;
1196 n_elem = m;
1197 lda = a_cs;
1198 inca = a_rs;
1199 ldb = b_cs;
1200 incb = b_rs;
1201
1202 // Handle the transposition of A.
1203 if ( bl1_does_trans( trans ) )
1204 {
1206 }
1207
1208 // An optimization: if B is row-major, then let's access the matrix by rows
1209 // instead of by columns for increased spatial locality.
1210 if ( bl1_is_row_storage( b_rs, b_cs ) )
1211 {
1215 }
1216 }
1217
1218 // Extract conj component from trans parameter.
1220
1221 for ( j = 0; j < n_iter; ++j )
1222 {
1223 a_begin = a + j*lda;
1224 b_begin = b + j*ldb;
1225
1227 n_elem,
1228 a_begin, inca,
1229 b_begin, incb );
1230 }
1231}

References bl1_ccopyv(), bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

◆ bl1_cconjm()

void bl1_cconjm ( int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs 
)
24{
25 float m1 = bl1_sm1();
26 float* a_conj;
27 int lda, inca;
28 int n_iter;
29 int n_elem;
30 int j;
31
32 // Return early if possible.
33 if ( bl1_zero_dim2( m, n ) ) return;
34
35 // Handle cases where A is a vector to ensure that the underlying axpy
36 // gets invoked only once.
37 if ( bl1_is_vector( m, n ) )
38 {
39 // Initialize with values appropriate for a vector.
40 n_iter = 1;
41 n_elem = bl1_vector_dim( m, n );
42 lda = 1; // multiplied by zero when n_iter == 1; not needed.
44 }
45 else // matrix case
46 {
47 // Initialize with optimal values for column-major storage.
48 n_iter = n;
49 n_elem = m;
50 lda = a_cs;
51 inca = a_rs;
52
53 // An optimization: if A is row-major, then let's access the matrix
54 // by rows instead of by columns to increase spatial locality.
56 {
59 }
60 }
61
62 for ( j = 0; j < n_iter; ++j )
63 {
64 a_conj = ( float* )( a + j*lda ) + 1;
65
67 &m1,
68 a_conj, 2*inca );
69 }
70}
void bl1_sscal(int n, float *alpha, float *x, int incx)
Definition bl1_scal.c:13
float bl1_sm1(void)
Definition bl1_constants.c:175

References bl1_is_row_storage(), bl1_is_vector(), bl1_sm1(), bl1_sscal(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by bl1_cgemm(), and FLA_Conjugate().

◆ bl1_cconjmr()

void bl1_cconjmr ( uplo1_t  uplo,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs 
)
24{
25 float m1 = bl1_sm1();
26 float* a_conj;
27 int lda, inca;
28 int n_iter;
29 int n_elem_max;
30 int n_elem;
31 int j;
32
33 // Return early if possible.
34 if ( bl1_zero_dim2( m, n ) ) return;
35
36 // We initialize for column-major.
37 n_iter = n;
38 n_elem_max = m;
39 lda = a_cs;
40 inca = a_rs;
41
42 // An optimization: if A is row-major, then let's access the matrix
43 // by rows instead of by columns to increase spatial locality.
45 {
48 bl1_toggle_uplo( uplo );
49 }
50
51 if ( bl1_is_upper( uplo ) )
52 {
53 for ( j = 0; j < n_iter; ++j )
54 {
55 n_elem = bl1_min( j + 1, n_elem_max );
56 a_conj = ( float* )( a + j*lda ) + 1;
57
59 &m1,
60 a_conj, 2*inca );
61 }
62 }
63 else // if ( bl1_is_lower( uplo ) )
64 {
65 for ( j = 0; j < n_iter; ++j )
66 {
67 n_elem = bl1_max( 0, n_elem_max - j );
68 a_conj = ( float* )( a + j*lda + j*inca ) + 1;
69
70 if ( n_elem <= 0 ) break;
71
73 &m1,
74 a_conj, 2*inca );
75 }
76 }
77}

References bl1_is_row_storage(), bl1_is_upper(), bl1_sm1(), bl1_sscal(), and bl1_zero_dim2().

Referenced by bl1_chemm(), bl1_ctrmm(), bl1_ctrsm(), and FLA_Conjugate_r().

◆ bl1_cconjv()

void bl1_cconjv ( int  m,
scomplex x,
int  incx 
)

◆ bl1_ccopy()

void bl1_ccopy ( int  m,
scomplex x,
int  incx,
scomplex y,
int  incy 
)
40{
41#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
42 cblas_ccopy( m,
43 x, incx,
44 y, incy );
45#else
46 F77_ccopy( &m,
47 x, &incx,
48 y, &incy );
49#endif
50}
void F77_ccopy(int *n, scomplex *x, int *incx, scomplex *y, int *incy)
void cblas_ccopy(const int N, const void *X, const int incX, void *Y, const int incY)

References cblas_ccopy(), and F77_ccopy().

Referenced by bl1_ccopymr(), bl1_ccopymt(), bl1_ccopyv(), and FLA_SA_LU_unb().

◆ bl1_ccopymr()

void bl1_ccopymr ( uplo1_t  uplo,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)
140{
143 int lda, inca;
144 int ldb, incb;
145 int n_iter;
146 int n_elem_max;
147 int n_elem;
148 int j;
149
150 // Return early if possible.
151 if ( bl1_zero_dim2( m, n ) ) return;
152
153 // We initialize for column-major.
154 n_iter = n;
155 n_elem_max = m;
156 lda = a_cs;
157 inca = a_rs;
158 ldb = b_cs;
159 incb = b_rs;
160
161 // An optimization: if A and B are both row-major, then let's access the
162 // matrices by rows instead of by columns for increased spatial locality.
164 {
168 bl1_toggle_uplo( uplo );
169 }
170
171
172 if ( bl1_is_upper( uplo ) )
173 {
174 for ( j = 0; j < n_iter; j++ )
175 {
176 n_elem = bl1_min( j + 1, n_elem_max );
177 a_begin = a + j*lda;
178 b_begin = b + j*ldb;
179
181 a_begin, inca,
182 b_begin, incb );
183 }
184 }
185 else // if ( bl1_is_lower( uplo ) )
186 {
187 for ( j = 0; j < n_iter; j++ )
188 {
189 n_elem = bl1_max( 0, n_elem_max - j );
190 a_begin = a + j*lda + j*inca;
191 b_begin = b + j*ldb + j*incb;
192
193 if ( n_elem <= 0 ) break;
194
196 a_begin, inca,
197 b_begin, incb );
198 }
199 }
200}
void bl1_ccopy(int m, scomplex *x, int incx, scomplex *y, int incy)
Definition bl1_copy.c:39

References bl1_ccopy(), bl1_is_row_storage(), bl1_is_upper(), and bl1_zero_dim2().

Referenced by bl1_ccreate_contigmr(), bl1_cfree_saved_contigmr(), and FLA_Copyr_external().

◆ bl1_ccopymrt()

void bl1_ccopymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)
224{
227 int lda, inca;
228 int ldb, incb;
229 int n_iter;
230 int n_elem;
231 int n_elem_max;
233 int j;
235
236 // Return early if possible.
237 if ( bl1_zero_dim2( m, n ) ) return;
238
239 // Initialize variables based on storage format of B and value of uplo.
240 if ( bl1_is_col_storage( b_rs, b_cs ) )
241 {
242 if ( bl1_is_lower( uplo ) )
243 {
244 n_iter = bl1_min( m, n );
245 n_elem_max = m;
246 lda = a_cs;
247 inca = a_rs;
248 ldb = b_cs;
249 incb = b_rs;
251 }
252 else // if ( bl1_is_upper( uplo ) )
253 {
254 n_iter = n;
255 n_elem_max = bl1_min( m, n );
256 lda = a_cs;
257 inca = a_rs;
258 ldb = b_cs;
259 incb = b_rs;
261 }
262 }
263 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
264 {
265 if ( bl1_is_lower( uplo ) )
266 {
267 n_iter = m;
268 n_elem_max = bl1_min( m, n );
269 lda = a_rs;
270 inca = a_cs;
271 ldb = b_rs;
272 incb = b_cs;
274 }
275 else // if ( bl1_is_upper( uplo ) )
276 {
277 n_iter = bl1_min( m, n );
278 n_elem_max = n;
279 lda = a_rs;
280 inca = a_cs;
281 ldb = b_rs;
282 incb = b_cs;
284 }
285 }
286
287 // Swap lda and inca if we're doing a transpose.
288 if ( bl1_does_trans( trans ) )
289 {
291 }
292
293 // Extract conj component from trans parameter.
295
296 // Choose the loop based on whether n_elem will be shrinking or growing
297 // with each iteration.
299 {
300 for ( j = 0; j < n_iter; j++ )
301 {
302 n_elem = n_elem_max - j;
303 a_begin = a + j*lda + j*inca;
304 b_begin = b + j*ldb + j*incb;
305
307 n_elem,
308 a_begin, inca,
309 b_begin, incb );
310 }
311 }
312 else // if ( n_elem_is_ascending )
313 {
314 for ( j = 0; j < n_iter; j++ )
315 {
316 n_elem = bl1_min( j + 1, n_elem_max );
317 a_begin = a + j*lda;
318 b_begin = b + j*ldb;
319
321 n_elem,
322 a_begin, inca,
323 b_begin, incb );
324 }
325 }
326}

References bl1_ccopyv(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), and bl1_zero_dim2().

Referenced by bl1_chemm(), bl1_ctrmm(), bl1_ctrsm(), FLA_Copyrt_external(), FLA_Lyap_h_opc_var1(), FLA_Lyap_h_opc_var2(), FLA_Lyap_h_opc_var3(), FLA_Lyap_h_opc_var4(), FLA_Lyap_n_opc_var1(), FLA_Lyap_n_opc_var2(), FLA_Lyap_n_opc_var3(), and FLA_Lyap_n_opc_var4().

◆ bl1_ccopymt()

void bl1_ccopymt ( trans1_t  trans,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)
216{
219 int lda, inca;
220 int ldb, incb;
221 int n_iter;
222 int n_elem;
223 int j;
224
225 // Return early if possible.
226 if ( bl1_zero_dim2( m, n ) ) return;
227
228 // Handle cases where A and B are vectors to ensure that the underlying copy
229 // gets invoked only once.
230 if ( bl1_is_vector( m, n ) )
231 {
232 // Initialize with values appropriate for vectors.
233 n_iter = 1;
234 n_elem = bl1_vector_dim( m, n );
235 lda = 1; // multiplied by zero when n_iter == 1; not needed.
236 inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
237 ldb = 1; // multiplied by zero when n_iter == 1; not needed.
239 }
240 else // matrix case
241 {
242 // Initialize with optimal values for column-major storage.
243 n_iter = n;
244 n_elem = m;
245 lda = a_cs;
246 inca = a_rs;
247 ldb = b_cs;
248 incb = b_rs;
249
250 // Handle the transposition of A.
251 if ( bl1_does_trans( trans ) )
252 {
254 }
255
256 // An optimization: if B is row-major and if A is effectively row-major
257 // after a possible transposition, then let's access the matrix by rows
258 // instead of by columns for increased spatial locality.
259 if ( bl1_is_row_storage( b_rs, b_cs ) )
260 {
261 if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
263 {
267 }
268 }
269 }
270
271 for ( j = 0; j < n_iter; j++ )
272 {
273 a_begin = a + j*lda;
274 b_begin = b + j*ldb;
275
277 a_begin, inca,
278 b_begin, incb );
279
280 if ( bl1_does_conj( trans ) )
282 b_begin, incb );
283 }
284}
void bl1_cconjv(int m, scomplex *x, int incx)
Definition bl1_conjv.c:23

References bl1_cconjv(), bl1_ccopy(), bl1_does_conj(), bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by bl1_ccreate_contigm(), bl1_ccreate_contigmt(), bl1_cfree_saved_contigm(), bl1_cfree_saved_contigmsr(), bl1_cgemm(), bl1_chemm(), bl1_cher2k(), bl1_csymm(), bl1_csyr2k(), bl1_ctrmmsx(), bl1_ctrsmsx(), FLA_Copy_external(), and FLA_Copyt_external().

◆ bl1_ccopyv()

void bl1_ccopyv ( conj1_t  conj,
int  m,
scomplex x,
int  incx,
scomplex y,
int  incy 
)

◆ bl1_cdcopymr()

void bl1_cdcopymr ( uplo1_t  uplo,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
double b,
int  b_rs,
int  b_cs 
)
855{
857 double* b_begin;
858 int lda, inca;
859 int ldb, incb;
860 int n_iter;
861 int n_elem_max;
862 int n_elem;
863 int j;
864
865 // Return early if possible.
866 if ( bl1_zero_dim2( m, n ) ) return;
867
868 // We initialize for column-major.
869 n_iter = n;
870 n_elem_max = m;
871 lda = a_cs;
872 inca = a_rs;
873 ldb = b_cs;
874 incb = b_rs;
875
876 // An optimization: if B is row-major, then let's access the matrix
877 // by rows instead of by columns for increased spatial locality.
878 if ( bl1_is_row_storage( b_rs, b_cs ) )
879 {
883 bl1_toggle_uplo( uplo );
884 }
885
886
887 if ( bl1_is_upper( uplo ) )
888 {
889 for ( j = 0; j < n_iter; j++ )
890 {
891 n_elem = bl1_min( j + 1, n_elem_max );
892 a_begin = a + j*lda;
893 b_begin = b + j*ldb;
894
896 n_elem,
897 a_begin, inca,
898 b_begin, incb );
899 }
900 }
901 else // if ( bl1_is_lower( uplo ) )
902 {
903 for ( j = 0; j < n_iter; j++ )
904 {
905 n_elem = bl1_max( 0, n_elem_max - j );
906 a_begin = a + j*lda + j*inca;
907 b_begin = b + j*ldb + j*incb;
908
909 if ( n_elem <= 0 ) break;
910
912 n_elem,
913 a_begin, inca,
914 b_begin, incb );
915 }
916 }
917}
void bl1_cdcopyv(conj1_t conj, int m, scomplex *x, int incx, double *y, int incy)
Definition bl1_copyv.c:236

References bl1_cdcopyv(), bl1_is_row_storage(), bl1_is_upper(), bl1_zero_dim2(), and BLIS1_NO_CONJUGATE.

Referenced by FLA_Copyr_external().

◆ bl1_cdcopymrt()

void bl1_cdcopymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
double b,
int  b_rs,
int  b_cs 
)
1391{
1393 double* b_begin;
1394 int lda, inca;
1395 int ldb, incb;
1396 int n_iter;
1397 int n_elem;
1398 int n_elem_max;
1400 int j;
1401 conj1_t conj;
1402
1403 // Return early if possible.
1404 if ( bl1_zero_dim2( m, n ) ) return;
1405
1406 // Initialize variables based on storage format of B and value of uplo.
1407 if ( bl1_is_col_storage( b_rs, b_cs ) )
1408 {
1409 if ( bl1_is_lower( uplo ) )
1410 {
1411 n_iter = bl1_min( m, n );
1412 n_elem_max = m;
1413 lda = a_cs;
1414 inca = a_rs;
1415 ldb = b_cs;
1416 incb = b_rs;
1418 }
1419 else // if ( bl1_is_upper( uplo ) )
1420 {
1421 n_iter = n;
1422 n_elem_max = bl1_min( m, n );
1423 lda = a_cs;
1424 inca = a_rs;
1425 ldb = b_cs;
1426 incb = b_rs;
1428 }
1429 }
1430 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
1431 {
1432 if ( bl1_is_lower( uplo ) )
1433 {
1434 n_iter = m;
1435 n_elem_max = bl1_min( m, n );
1436 lda = a_rs;
1437 inca = a_cs;
1438 ldb = b_rs;
1439 incb = b_cs;
1441 }
1442 else // if ( bl1_is_upper( uplo ) )
1443 {
1444 n_iter = bl1_min( m, n );
1445 n_elem_max = n;
1446 lda = a_rs;
1447 inca = a_cs;
1448 ldb = b_rs;
1449 incb = b_cs;
1451 }
1452 }
1453
1454 // Swap lda and inca if we're doing a transpose.
1455 if ( bl1_does_trans( trans ) )
1456 {
1458 }
1459
1460 // Extract conj component from trans parameter.
1462
1463 // Choose the loop based on whether n_elem will be shrinking or growing
1464 // with each iteration.
1466 {
1467 for ( j = 0; j < n_iter; j++ )
1468 {
1469 n_elem = n_elem_max - j;
1470 a_begin = a + j*lda + j*inca;
1471 b_begin = b + j*ldb + j*incb;
1472
1474 n_elem,
1475 a_begin, inca,
1476 b_begin, incb );
1477 }
1478 }
1479 else // if ( n_elem_is_ascending )
1480 {
1481 for ( j = 0; j < n_iter; j++ )
1482 {
1483 n_elem = bl1_min( j + 1, n_elem_max );
1484 a_begin = a + j*lda;
1485 b_begin = b + j*ldb;
1486
1488 n_elem,
1489 a_begin, inca,
1490 b_begin, incb );
1491 }
1492 }
1493}

References bl1_cdcopyv(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), and bl1_zero_dim2().

Referenced by FLA_Copyrt_external().

◆ bl1_cdcopymt()

void bl1_cdcopymt ( trans1_t  trans,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
double b,
int  b_rs,
int  b_cs 
)
965{
967 double* b_begin;
968 int lda, inca;
969 int ldb, incb;
970 int n_iter;
971 int n_elem;
972 int j;
974
975 // Return early if possible.
976 if ( bl1_zero_dim2( m, n ) ) return;
977
978 // Handle cases where A and B are vectors to ensure that the underlying copy
979 // gets invoked only once.
980 if ( bl1_is_vector( m, n ) )
981 {
982 // Initialize with values appropriate for vectors.
983 n_iter = 1;
984 n_elem = bl1_vector_dim( m, n );
985 lda = 1; // multiplied by zero when n_iter == 1; not needed.
986 inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
987 ldb = 1; // multiplied by zero when n_iter == 1; not needed.
989 }
990 else // matrix case
991 {
992 // Initialize with optimal values for column-major storage of B.
993 n_iter = n;
994 n_elem = m;
995 lda = a_cs;
996 inca = a_rs;
997 ldb = b_cs;
998 incb = b_rs;
999
1000 // Handle the transposition of A.
1001 if ( bl1_does_trans( trans ) )
1002 {
1004 }
1005
1006 // An optimization: if B is row-major, then let's access the matrix by rows
1007 // instead of by columns for increased spatial locality.
1008 if ( bl1_is_row_storage( b_rs, b_cs ) )
1009 {
1013 }
1014 }
1015
1016 // Extract conj component from trans parameter.
1018
1019 for ( j = 0; j < n_iter; ++j )
1020 {
1021 a_begin = a + j*lda;
1022 b_begin = b + j*ldb;
1023
1025 n_elem,
1026 a_begin, inca,
1027 b_begin, incb );
1028 }
1029}

References bl1_cdcopyv(), bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Copy_external(), and FLA_Copyt_external().

◆ bl1_cdcopyv()

void bl1_cdcopyv ( conj1_t  conj,
int  m,
scomplex x,
int  incx,
double y,
int  incy 
)
237{
238 scomplex* chi;
239 double* psi;
240 int i;
241
242 // Return early if possible.
243 if ( bl1_zero_dim1( m ) ) return;
244
245 // Initialize pointers.
246 chi = x;
247 psi = y;
248
249 for ( i = 0; i < m; ++i )
250 {
251 *psi = chi->real;
252
253 chi += incx;
254 psi += incy;
255 }
256}

References bl1_zero_dim1(), i, and scomplex::real.

Referenced by bl1_cdcopymr(), bl1_cdcopymrt(), and bl1_cdcopymt().

◆ bl1_cdot()

void bl1_cdot ( conj1_t  conj,
int  n,
scomplex x,
int  incx,
scomplex y,
int  incy,
scomplex rho 
)
40{
41#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
42 if ( bl1_is_conj( conj ) )
43 {
45 x, incx,
46 y, incy,
47 rho );
48 }
49 else // if ( !bl1_is_conj( conj ) )
50 {
52 x, incx,
53 y, incy,
54 rho );
55 }
56#else
58 n,
59 x, incx,
60 y, incy,
61 rho );
62#endif
63}
* rho
Definition bl1_axpyv2bdotaxpy.c:322
void bl1_cdot_in(conj1_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho)
Definition bl1_dot.c:94
void cblas_cdotc_sub(const int N, const void *X, const int incX, const void *Y, const int incY, void *dotc)
void cblas_cdotu_sub(const int N, const void *X, const int incX, const void *Y, const int incY, void *dotu)

References bl1_cdot_in(), bl1_is_conj(), cblas_cdotc_sub(), cblas_cdotu_sub(), and rho.

Referenced by bl1_cdot2s(), bl1_cdots(), FLA_Bidiag_UT_u_step_ofc_var2(), FLA_Bidiag_UT_u_step_ofc_var3(), FLA_Bidiag_UT_u_step_ofc_var4(), FLA_Bidiag_UT_u_step_opc_var2(), FLA_Bidiag_UT_u_step_opc_var3(), FLA_Bidiag_UT_u_step_opc_var4(), FLA_Bidiag_UT_u_step_opc_var5(), FLA_Dot_external(), FLA_Dotc_external(), FLA_Fused_Ahx_Ax_opc_var1(), FLA_Fused_Gerc2_Ahx_Ax_opc_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_opc_var1(), FLA_Fused_Her2_Ax_l_opc_var1(), FLA_Fused_Uhu_Yhu_Zhu_opc_var1(), FLA_Fused_UYx_ZVx_opc_var1(), FLA_Fused_UZhu_ZUhu_opc_var1(), FLA_Hess_UT_step_ofc_var2(), FLA_Hess_UT_step_ofc_var3(), FLA_Hess_UT_step_ofc_var4(), FLA_Hess_UT_step_opc_var2(), FLA_Hess_UT_step_opc_var3(), FLA_Hess_UT_step_opc_var4(), FLA_Hess_UT_step_opc_var5(), FLA_Sylv_hh_opc_var1(), FLA_Sylv_hn_opc_var1(), FLA_Sylv_nh_opc_var1(), FLA_Sylv_nn_opc_var1(), FLA_Tridiag_UT_l_step_ofc_var2(), FLA_Tridiag_UT_l_step_ofc_var3(), FLA_Tridiag_UT_l_step_opc_var1(), FLA_Tridiag_UT_l_step_opc_var2(), and FLA_Tridiag_UT_l_step_opc_var3().

◆ bl1_cdot2s()

void bl1_cdot2s ( conj1_t  conj,
int  n,
scomplex alpha,
scomplex x,
int  incx,
scomplex y,
int  incy,
scomplex beta,
scomplex rho 
)
40{
47
48 alphac_d.imag *= -1.0F;
49
51 n,
52 x, incx,
53 y, incy,
54 &dotxy );
55
57 n,
58 y, incy,
59 x, incx,
60 &dotyx );
61
62 rho->real = beta_d.real * rho_d.real - beta_d.imag * rho_d.imag +
63 alpha_d.real * dotxy.real - alpha_d.imag * dotxy.imag +
64 alphac_d.real * dotyx.real - alphac_d.imag * dotyx.imag;
65 rho->imag = beta_d.real * rho_d.imag + beta_d.imag * rho_d.real +
66 alpha_d.real * dotxy.imag + alpha_d.imag * dotxy.real +
67 alphac_d.real * dotyx.imag + alphac_d.imag * dotyx.real;
68}
void bl1_cdot(conj1_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho)
Definition bl1_dot.c:39
float imag
Definition blis_type_defs.h:134

References bl1_cdot(), scomplex::imag, scomplex::real, and rho.

Referenced by FLA_Dot2cs_external(), FLA_Dot2s_external(), FLA_Eig_gest_il_opc_var1(), FLA_Eig_gest_il_opc_var2(), FLA_Eig_gest_il_opc_var3(), FLA_Eig_gest_iu_opc_var1(), FLA_Eig_gest_iu_opc_var2(), FLA_Eig_gest_iu_opc_var3(), FLA_Eig_gest_nl_opc_var1(), FLA_Eig_gest_nl_opc_var2(), FLA_Eig_gest_nu_opc_var1(), FLA_Eig_gest_nu_opc_var2(), FLA_Lyap_h_opc_var1(), FLA_Lyap_h_opc_var2(), FLA_Lyap_h_opc_var3(), FLA_Lyap_n_opc_var1(), FLA_Lyap_n_opc_var2(), and FLA_Lyap_n_opc_var3().

◆ bl1_cdot_in()

void bl1_cdot_in ( conj1_t  conj,
int  n,
scomplex x,
int  incx,
scomplex y,
int  incy,
scomplex rho 
)
95{
101 int i;
102
103 rho_temp.real = 0.0F;
104 rho_temp.imag = 0.0F;
105
106 xip = x;
107 yip = y;
108
109 if ( bl1_is_conj( conj ) )
110 {
111 for ( i = 0; i < n; ++i )
112 {
113 xi.real = xip->real;
114 xi.imag = xip->imag;
115 yi.real = yip->real;
116 yi.imag = yip->imag;
117
118 rho_temp.real += xi.real * yi.real - -xi.imag * yi.imag;
119 rho_temp.imag += xi.real * yi.imag + -xi.imag * yi.real;
120
121 xip += incx;
122 yip += incy;
123 }
124 }
125 else // if ( !bl1_is_conj( conj ) )
126 {
127 for ( i = 0; i < n; ++i )
128 {
129 xi.real = xip->real;
130 xi.imag = xip->imag;
131 yi.real = yip->real;
132 yi.imag = yip->imag;
133
134 rho_temp.real += xi.real * yi.real - xi.imag * yi.imag;
135 rho_temp.imag += xi.real * yi.imag + xi.imag * yi.real;
136
137 xip += incx;
138 yip += incy;
139 }
140 }
141
142 rho->real = rho_temp.real;
143 rho->imag = rho_temp.imag;
144}

References bl1_is_conj(), i, scomplex::imag, scomplex::real, and rho.

Referenced by bl1_cdot().

◆ bl1_cdots()

void bl1_cdots ( conj1_t  conj,
int  n,
scomplex alpha,
scomplex x,
int  incx,
scomplex y,
int  incy,
scomplex beta,
scomplex rho 
)

◆ bl1_cfnorm()

void bl1_cfnorm ( int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
float norm 
)
122{
123 scomplex* a_ij;
124 float sum;
125 int lda, inca;
126 int n_iter;
127 int n_elem;
128 int i, j;
129
130 // Return early if possible.
131 if ( bl1_zero_dim2( m, n ) ) return;
132
133 // Handle cases where A is a vector separately.
134 if ( bl1_is_vector( m, n ) )
135 {
136 // Initialize with values appropriate for vectors.
137 n_iter = 1;
138 n_elem = bl1_vector_dim( m, n );
139 lda = 1; // multiplied by zero when n_iter == 1; not needed.
141 }
142 else // matrix case
143 {
144 // Initialize with optimal values for column-major storage.
145 n_iter = n;
146 n_elem = m;
147 lda = a_cs;
148 inca = a_rs;
149
150 // An optimization: if A is row-major, then let's access the matrix by
151 // rows instead of by columns for increased spatial locality.
152 if ( bl1_is_row_storage( a_rs, a_cs ) )
153 {
156 }
157 }
158
159 // Initialize the accumulator variable.
160 sum = 0.0F;
161
162 for ( j = 0; j < n_iter; j++ )
163 {
164 for ( i = 0; i < n_elem; i++ )
165 {
166 a_ij = a + i*inca + j*lda;
167 sum += a_ij->real * a_ij->real + a_ij->imag * a_ij->imag;
168 }
169 }
170
171 // Compute the norm and store the result.
172 *norm = ( float ) sqrt( sum );
173}

References bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), BLIS1_NO_TRANSPOSE, i, scomplex::imag, and scomplex::real.

Referenced by FLA_Norm_frob().

◆ bl1_cinvscalm()

void bl1_cinvscalm ( conj1_t  conj,
int  m,
int  n,
scomplex alpha,
scomplex a,
int  a_rs,
int  a_cs 
)
170{
173 int lda, inca;
174 int n_iter;
175 int n_elem;
176 int j;
177
178 // Return early if possible.
179 if ( bl1_zero_dim2( m, n ) ) return;
180 if ( bl1_ceq1( alpha ) ) return;
181
182 // Handle cases where A is a vector to ensure that the underlying axpy
183 // gets invoked only once.
184 if ( bl1_is_vector( m, n ) )
185 {
186 // Initialize with values appropriate for a vector.
187 n_iter = 1;
188 n_elem = bl1_vector_dim( m, n );
189 lda = 1; // multiplied by zero when n_iter == 1; not needed.
191 }
192 else // matrix case
193 {
194 // Initialize with optimal values for column-major storage.
195 n_iter = n;
196 n_elem = m;
197 lda = a_cs;
198 inca = a_rs;
199
200 // An optimization: if A is row-major, then let's access the matrix
201 // by rows instead of by columns to increase spatial locality.
202 if ( bl1_is_row_storage( a_rs, a_cs ) )
203 {
206 }
207 }
208
210
211 for ( j = 0; j < n_iter; j++ )
212 {
213 a_begin = a + j*lda;
214
216 &alpha_inv,
217 a_begin, inca );
218 }
219}
void bl1_cinvert2s(conj1_t conj, scomplex *alpha, scomplex *beta)
Definition bl1_invert2s.c:27

References bl1_cinvert2s(), bl1_cscal(), bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Inv_scal_external(), and FLA_Inv_scalc_external().

◆ bl1_cinvscalv()

void bl1_cinvscalv ( conj1_t  conj,
int  n,
scomplex alpha,
scomplex x,
int  incx 
)

◆ bl1_cnrm2()

void bl1_cnrm2 ( int  n,
scomplex x,
int  incx,
float norm 
)
36{
37#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
38 *norm = cblas_scnrm2( n,
39 x, incx );
40#else
41 *norm = F77_scnrm2( &n,
42 x, &incx );
43#endif
44}
float F77_scnrm2(int *n, scomplex *x, int *incx)
float cblas_scnrm2(const int N, const void *X, const int incX)

References cblas_scnrm2(), and F77_scnrm2().

Referenced by FLA_Househ2_UT_l_opc(), FLA_Househ2s_UT_l_opc(), FLA_Househ3UD_UT_opc(), and FLA_Nrm2_external().

◆ bl1_cscal()

void bl1_cscal ( int  n,
scomplex alpha,
scomplex x,
int  incx 
)
53{
54#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
55 cblas_cscal( n,
56 alpha,
57 x, incx );
58#else
59 F77_cscal( &n,
60 alpha,
61 x, &incx );
62#endif
63}
void F77_cscal(int *n, scomplex *alpha, scomplex *y, int *incy)
void cblas_cscal(const int N, const void *alpha, void *X, const int incX)

References cblas_cscal(), and F77_cscal().

Referenced by bl1_caxpysmt(), bl1_caxpysv(), bl1_cinvscalm(), bl1_cinvscalv(), bl1_cscalm(), bl1_cscalmr(), bl1_cscalv(), and FLA_SA_LU_unb().

◆ bl1_cscalm()

void bl1_cscalm ( conj1_t  conj,
int  m,
int  n,
scomplex alpha,
scomplex a,
int  a_rs,
int  a_cs 
)
170{
173 int lda, inca;
174 int n_iter;
175 int n_elem;
176 int j;
177
178 // Return early if possible.
179 if ( bl1_zero_dim2( m, n ) ) return;
180 if ( bl1_ceq1( alpha ) ) return;
181
182 // Handle cases where A is a vector to ensure that the underlying axpy
183 // gets invoked only once.
184 if ( bl1_is_vector( m, n ) )
185 {
186 // Initialize with values appropriate for a vector.
187 n_iter = 1;
188 n_elem = bl1_vector_dim( m, n );
189 lda = 1; // multiplied by zero when n_iter == 1; not needed.
191 }
192 else // matrix case
193 {
194 // Initialize with optimal values for column-major storage.
195 n_iter = n;
196 n_elem = m;
197 lda = a_cs;
198 inca = a_rs;
199
200 // An optimization: if A is row-major, then let's access the matrix
201 // by rows instead of by columns to increase spatial locality.
202 if ( bl1_is_row_storage( a_rs, a_cs ) )
203 {
206 }
207 }
208
210
211 for ( j = 0; j < n_iter; j++ )
212 {
213 a_begin = a + j*lda;
214
216 &alpha_conj,
217 a_begin, inca );
218 }
219}

References bl1_cscal(), bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by bl1_cgemm(), bl1_chemm(), bl1_csymm(), bl1_ctrmmsx(), bl1_ctrsmsx(), FLA_Lyap_h_opc_var1(), FLA_Lyap_h_opc_var2(), FLA_Lyap_h_opc_var3(), FLA_Lyap_h_opc_var4(), FLA_Lyap_n_opc_var1(), FLA_Lyap_n_opc_var2(), FLA_Lyap_n_opc_var3(), FLA_Lyap_n_opc_var4(), FLA_Scal_external(), and FLA_Scalc_external().

◆ bl1_cscalmr()

void bl1_cscalmr ( uplo1_t  uplo,
int  m,
int  n,
scomplex alpha,
scomplex a,
int  a_rs,
int  a_cs 
)
182{
184 int lda, inca;
185 int n_iter;
186 int n_elem_max;
187 int n_elem;
188 int j;
189
190 // Return early if possible.
191 if ( bl1_zero_dim2( m, n ) ) return;
192 if ( bl1_ceq1( alpha ) ) return;
193
194 // We initialize for column-major.
195 n_iter = n;
196 n_elem_max = m;
197 lda = a_cs;
198 inca = a_rs;
199
200 // An optimization: if A is row-major, then let's access the matrix
201 // by rows instead of by columns to increase spatial locality.
202 if ( bl1_is_row_storage( a_rs, a_cs ) )
203 {
206 bl1_toggle_uplo( uplo );
207 }
208
209 if ( bl1_is_upper( uplo ) )
210 {
211 for ( j = 0; j < n_iter; j++ )
212 {
213 n_elem = bl1_min( j + 1, n_elem_max );
214 a_begin = a + j*lda;
215
217 alpha,
218 a_begin, inca );
219 }
220 }
221 else // if ( bl1_is_lower( uplo ) )
222 {
223 for ( j = 0; j < n_iter; j++ )
224 {
225 n_elem = bl1_max( 0, n_elem_max - j );
226 a_begin = a + j*lda + j*inca;
227
228 if ( n_elem <= 0 ) break;
229
231 alpha,
232 a_begin, inca );
233 }
234 }
235}

References bl1_cscal(), bl1_is_row_storage(), bl1_is_upper(), and bl1_zero_dim2().

Referenced by FLA_Scalr_external().

◆ bl1_cscalv()

void bl1_cscalv ( conj1_t  conj,
int  n,
scomplex alpha,
scomplex x,
int  incx 
)

◆ bl1_cscopymr()

void bl1_cscopymr ( uplo1_t  uplo,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
float b,
int  b_rs,
int  b_cs 
)
529{
531 float* b_begin;
532 int lda, inca;
533 int ldb, incb;
534 int n_iter;
535 int n_elem_max;
536 int n_elem;
537 int j;
538
539 // Return early if possible.
540 if ( bl1_zero_dim2( m, n ) ) return;
541
542 // We initialize for column-major.
543 n_iter = n;
544 n_elem_max = m;
545 lda = a_cs;
546 inca = a_rs;
547 ldb = b_cs;
548 incb = b_rs;
549
550 // An optimization: if B is row-major, then let's access the matrix
551 // by rows instead of by columns for increased spatial locality.
552 if ( bl1_is_row_storage( b_rs, b_cs ) )
553 {
557 bl1_toggle_uplo( uplo );
558 }
559
560
561 if ( bl1_is_upper( uplo ) )
562 {
563 for ( j = 0; j < n_iter; j++ )
564 {
565 n_elem = bl1_min( j + 1, n_elem_max );
566 a_begin = a + j*lda;
567 b_begin = b + j*ldb;
568
570 n_elem,
571 a_begin, inca,
572 b_begin, incb );
573 }
574 }
575 else // if ( bl1_is_lower( uplo ) )
576 {
577 for ( j = 0; j < n_iter; j++ )
578 {
579 n_elem = bl1_max( 0, n_elem_max - j );
580 a_begin = a + j*lda + j*inca;
581 b_begin = b + j*ldb + j*incb;
582
583 if ( n_elem <= 0 ) break;
584
586 n_elem,
587 a_begin, inca,
588 b_begin, incb );
589 }
590 }
591}
void bl1_cscopyv(conj1_t conj, int m, scomplex *x, int incx, float *y, int incy)
Definition bl1_copyv.c:146

References bl1_cscopyv(), bl1_is_row_storage(), bl1_is_upper(), bl1_zero_dim2(), and BLIS1_NO_CONJUGATE.

Referenced by FLA_Copyr_external().

◆ bl1_cscopymrt()

void bl1_cscopymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
float b,
int  b_rs,
int  b_cs 
)
1285{
1287 float* b_begin;
1288 int lda, inca;
1289 int ldb, incb;
1290 int n_iter;
1291 int n_elem;
1292 int n_elem_max;
1294 int j;
1295 conj1_t conj;
1296
1297 // Return early if possible.
1298 if ( bl1_zero_dim2( m, n ) ) return;
1299
1300 // Initialize variables based on storage format of B and value of uplo.
1301 if ( bl1_is_col_storage( b_rs, b_cs ) )
1302 {
1303 if ( bl1_is_lower( uplo ) )
1304 {
1305 n_iter = bl1_min( m, n );
1306 n_elem_max = m;
1307 lda = a_cs;
1308 inca = a_rs;
1309 ldb = b_cs;
1310 incb = b_rs;
1312 }
1313 else // if ( bl1_is_upper( uplo ) )
1314 {
1315 n_iter = n;
1316 n_elem_max = bl1_min( m, n );
1317 lda = a_cs;
1318 inca = a_rs;
1319 ldb = b_cs;
1320 incb = b_rs;
1322 }
1323 }
1324 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
1325 {
1326 if ( bl1_is_lower( uplo ) )
1327 {
1328 n_iter = m;
1329 n_elem_max = bl1_min( m, n );
1330 lda = a_rs;
1331 inca = a_cs;
1332 ldb = b_rs;
1333 incb = b_cs;
1335 }
1336 else // if ( bl1_is_upper( uplo ) )
1337 {
1338 n_iter = bl1_min( m, n );
1339 n_elem_max = n;
1340 lda = a_rs;
1341 inca = a_cs;
1342 ldb = b_rs;
1343 incb = b_cs;
1345 }
1346 }
1347
1348 // Swap lda and inca if we're doing a transpose.
1349 if ( bl1_does_trans( trans ) )
1350 {
1352 }
1353
1354 // Extract conj component from trans parameter.
1356
1357 // Choose the loop based on whether n_elem will be shrinking or growing
1358 // with each iteration.
1360 {
1361 for ( j = 0; j < n_iter; j++ )
1362 {
1363 n_elem = n_elem_max - j;
1364 a_begin = a + j*lda + j*inca;
1365 b_begin = b + j*ldb + j*incb;
1366
1368 n_elem,
1369 a_begin, inca,
1370 b_begin, incb );
1371 }
1372 }
1373 else // if ( n_elem_is_ascending )
1374 {
1375 for ( j = 0; j < n_iter; j++ )
1376 {
1377 n_elem = bl1_min( j + 1, n_elem_max );
1378 a_begin = a + j*lda;
1379 b_begin = b + j*ldb;
1380
1382 n_elem,
1383 a_begin, inca,
1384 b_begin, incb );
1385 }
1386 }
1387}

References bl1_cscopyv(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), and bl1_zero_dim2().

Referenced by FLA_Copyrt_external().

◆ bl1_cscopymt()

void bl1_cscopymt ( trans1_t  trans,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
float b,
int  b_rs,
int  b_cs 
)
629{
631 float* b_begin;
632 int lda, inca;
633 int ldb, incb;
634 int n_iter;
635 int n_elem;
636 int j;
638
639 // Return early if possible.
640 if ( bl1_zero_dim2( m, n ) ) return;
641
642 // Handle cases where A and B are vectors to ensure that the underlying copy
643 // gets invoked only once.
644 if ( bl1_is_vector( m, n ) )
645 {
646 // Initialize with values appropriate for vectors.
647 n_iter = 1;
648 n_elem = bl1_vector_dim( m, n );
649 lda = 1; // multiplied by zero when n_iter == 1; not needed.
650 inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
651 ldb = 1; // multiplied by zero when n_iter == 1; not needed.
653 }
654 else // matrix case
655 {
656 // Initialize with optimal values for column-major storage of B.
657 n_iter = n;
658 n_elem = m;
659 lda = a_cs;
660 inca = a_rs;
661 ldb = b_cs;
662 incb = b_rs;
663
664 // Handle the transposition of A.
665 if ( bl1_does_trans( trans ) )
666 {
668 }
669
670 // An optimization: if B is row-major, then let's access the matrix by rows
671 // instead of by columns for increased spatial locality.
672 if ( bl1_is_row_storage( b_rs, b_cs ) )
673 {
677 }
678 }
679
680 // Extract conj component from trans parameter.
682
683 for ( j = 0; j < n_iter; ++j )
684 {
685 a_begin = a + j*lda;
686 b_begin = b + j*ldb;
687
689 n_elem,
690 a_begin, inca,
691 b_begin, incb );
692 }
693}

References bl1_cscopyv(), bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Copy_external(), and FLA_Copyt_external().

◆ bl1_cscopyv()

void bl1_cscopyv ( conj1_t  conj,
int  m,
scomplex x,
int  incx,
float y,
int  incy 
)
147{
148 scomplex* chi;
149 float* psi;
150 int i;
151
152 // Return early if possible.
153 if ( bl1_zero_dim1( m ) ) return;
154
155 // Initialize pointers.
156 chi = x;
157 psi = y;
158
159 for ( i = 0; i < m; ++i )
160 {
161 *psi = chi->real;
162
163 chi += incx;
164 psi += incy;
165 }
166}

References bl1_zero_dim1(), i, and scomplex::real.

Referenced by bl1_cscopymr(), bl1_cscopymrt(), and bl1_cscopymt().

◆ bl1_csinvscalm()

void bl1_csinvscalm ( conj1_t  conj,
int  m,
int  n,
float alpha,
scomplex a,
int  a_rs,
int  a_cs 
)
118{
119 float alpha_inv;
121 int lda, inca;
122 int n_iter;
123 int n_elem;
124 int j;
125
126 // Return early if possible.
127 if ( bl1_zero_dim2( m, n ) ) return;
128 if ( bl1_seq1( alpha ) ) return;
129
130 // Handle cases where A is a vector to ensure that the underlying axpy
131 // gets invoked only once.
132 if ( bl1_is_vector( m, n ) )
133 {
134 // Initialize with values appropriate for a vector.
135 n_iter = 1;
136 n_elem = bl1_vector_dim( m, n );
137 lda = 1; // multiplied by zero when n_iter == 1; not needed.
139 }
140 else // matrix case
141 {
142 // Initialize with optimal values for column-major storage.
143 n_iter = n;
144 n_elem = m;
145 lda = a_cs;
146 inca = a_rs;
147
148 // An optimization: if A is row-major, then let's access the matrix
149 // by rows instead of by columns to increase spatial locality.
150 if ( bl1_is_row_storage( a_rs, a_cs ) )
151 {
154 }
155 }
156
158
159 for ( j = 0; j < n_iter; j++ )
160 {
161 a_begin = a + j*lda;
162
164 &alpha_inv,
165 a_begin, inca );
166 }
167}
void bl1_csscal(int n, float *alpha, scomplex *x, int incx)
Definition bl1_scal.c:39
void bl1_sinvert2s(conj1_t conj, float *alpha, float *beta)
Definition bl1_invert2s.c:13

References bl1_csscal(), bl1_is_row_storage(), bl1_is_vector(), bl1_sinvert2s(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Inv_scal_external(), and FLA_Inv_scalc_external().

◆ bl1_csinvscalv()

void bl1_csinvscalv ( conj1_t  conj,
int  n,
float alpha,
scomplex x,
int  incx 
)
40{
41 float alpha_inv;
42
43 if ( bl1_seq1( alpha ) ) return;
44
45 alpha_inv = 1.0F / *alpha;
46
47 bl1_csscal( n,
48 &alpha_inv,
49 x, incx );
50}

References bl1_csscal().

◆ bl1_csscal()

void bl1_csscal ( int  n,
float alpha,
scomplex x,
int  incx 
)
40{
41#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
42 cblas_csscal( n,
43 *alpha,
44 x, incx );
45#else
46 F77_csscal( &n,
47 alpha,
48 x, &incx );
49#endif
50}
void F77_csscal(int *n, float *alpha, scomplex *y, int *incy)
void cblas_csscal(const int N, const float alpha, void *X, const int incX)

References cblas_csscal(), and F77_csscal().

Referenced by bl1_csinvscalm(), bl1_csinvscalv(), bl1_csscalm(), bl1_csscalmr(), and bl1_csscalv().

◆ bl1_csscalm()

void bl1_csscalm ( conj1_t  conj,
int  m,
int  n,
float alpha,
scomplex a,
int  a_rs,
int  a_cs 
)
118{
119 float alpha_conj;
121 int lda, inca;
122 int n_iter;
123 int n_elem;
124 int j;
125
126 // Return early if possible.
127 if ( bl1_zero_dim2( m, n ) ) return;
128 if ( bl1_seq1( alpha ) ) return;
129
130 // Handle cases where A is a vector to ensure that the underlying axpy
131 // gets invoked only once.
132 if ( bl1_is_vector( m, n ) )
133 {
134 // Initialize with values appropriate for a vector.
135 n_iter = 1;
136 n_elem = bl1_vector_dim( m, n );
137 lda = 1; // multiplied by zero when n_iter == 1; not needed.
139 }
140 else // matrix case
141 {
142 // Initialize with optimal values for column-major storage.
143 n_iter = n;
144 n_elem = m;
145 lda = a_cs;
146 inca = a_rs;
147
148 // An optimization: if A is row-major, then let's access the matrix
149 // by rows instead of by columns to increase spatial locality.
150 if ( bl1_is_row_storage( a_rs, a_cs ) )
151 {
154 }
155 }
156
158
159 for ( j = 0; j < n_iter; j++ )
160 {
161 a_begin = a + j*lda;
162
164 &alpha_conj,
165 a_begin, inca );
166 }
167}

References bl1_csscal(), bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Scal_external(), and FLA_Scalc_external().

◆ bl1_csscalmr()

void bl1_csscalmr ( uplo1_t  uplo,
int  m,
int  n,
float alpha,
scomplex a,
int  a_rs,
int  a_cs 
)
126{
128 int lda, inca;
129 int n_iter;
130 int n_elem_max;
131 int n_elem;
132 int j;
133
134 // Return early if possible.
135 if ( bl1_zero_dim2( m, n ) ) return;
136 if ( bl1_seq1( alpha ) ) return;
137
138 // We initialize for column-major.
139 n_iter = n;
140 n_elem_max = m;
141 lda = a_cs;
142 inca = a_rs;
143
144 // An optimization: if A is row-major, then let's access the matrix
145 // by rows instead of by columns to increase spatial locality.
146 if ( bl1_is_row_storage( a_rs, a_cs ) )
147 {
150 bl1_toggle_uplo( uplo );
151 }
152
153 if ( bl1_is_upper( uplo ) )
154 {
155 for ( j = 0; j < n_iter; j++ )
156 {
157 n_elem = bl1_min( j + 1, n_elem_max );
158 a_begin = a + j*lda;
159
161 alpha,
162 a_begin, inca );
163 }
164 }
165 else // if ( bl1_is_lower( uplo ) )
166 {
167 for ( j = 0; j < n_iter; j++ )
168 {
169 n_elem = bl1_max( 0, n_elem_max - j );
170 a_begin = a + j*lda + j*inca;
171
172 if ( n_elem <= 0 ) break;
173
175 alpha,
176 a_begin, inca );
177 }
178 }
179}

References bl1_csscal(), bl1_is_row_storage(), bl1_is_upper(), and bl1_zero_dim2().

Referenced by bl1_cher2k(), bl1_cherk(), and FLA_Scalr_external().

◆ bl1_csscalv()

void bl1_csscalv ( conj1_t  conj,
int  n,
float alpha,
scomplex x,
int  incx 
)
36{
37 // Return early if possible.
38 if ( bl1_zero_dim1( n ) ) return;
39 if ( bl1_seq1( alpha ) ) return;
40
41 bl1_csscal( n,
42 alpha,
43 x, incx );
44}

References bl1_csscal(), and bl1_zero_dim1().

Referenced by bl1_csapdiagmv(), FLA_Bsvd_ext_opc_var1(), and FLA_Bsvd_v_opc_var1().

◆ bl1_cswap()

void bl1_cswap ( int  n,
scomplex x,
int  incx,
scomplex y,
int  incy 
)
40{
41#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
42 cblas_cswap( n,
43 x, incx,
44 y, incy );
45#else
46 F77_cswap( &n,
47 x, &incx,
48 y, &incy );
49#endif
50}
void F77_cswap(int *n, scomplex *x, int *incx, scomplex *y, int *incy)
void cblas_cswap(const int N, void *X, const int incX, void *Y, const int incY)

References cblas_cswap(), and F77_cswap().

Referenced by bl1_cswapmt(), bl1_cswapv(), FLA_SA_Apply_pivots(), and FLA_SA_LU_unb().

◆ bl1_cswapmt()

void bl1_cswapmt ( trans1_t  trans,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)
148{
151 int lda, inca;
152 int ldb, incb;
153 int n_iter;
154 int n_elem;
155 int j;
156
157 // Return early if possible.
158 if ( bl1_zero_dim2( m, n ) ) return;
159
160 // Handle cases where A and B are vectors to ensure that the underlying copy
161 // gets invoked only once.
162 if ( bl1_is_vector( m, n ) )
163 {
164 // Initialize with values appropriate for vectors.
165 n_iter = 1;
166 n_elem = bl1_vector_dim( m, n );
167 lda = 1; // multiplied by zero when n_iter == 1; not needed.
168 inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
169 ldb = 1; // multiplied by zero when n_iter == 1; not needed.
171 }
172 else // matrix case
173 {
174 // Initialize with optimal values for column-major storage.
175 n_iter = n;
176 n_elem = m;
177 lda = a_cs;
178 inca = a_rs;
179 ldb = b_cs;
180 incb = b_rs;
181
182 // Handle the transposition of A.
183 if ( bl1_does_trans( trans ) )
184 {
186 }
187
188 // An optimization: if B is row-major and if A is effectively row-major
189 // after a possible transposition, then let's access the matrix by rows
190 // instead of by columns for increased spatial locality.
191 if ( bl1_is_row_storage( b_rs, b_cs ) )
192 {
193 if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
195 {
199 }
200 }
201 }
202
203 for ( j = 0; j < n_iter; j++ )
204 {
205 a_begin = a + j*lda;
206 b_begin = b + j*ldb;
207
209 a_begin, inca,
210 b_begin, incb );
211
212 if ( bl1_does_conj( trans ) )
214 a_begin, inca );
215
216 if ( bl1_does_conj( trans ) )
218 b_begin, incb );
219 }
220}
void bl1_cswap(int n, scomplex *x, int incx, scomplex *y, int incy)
Definition bl1_swap.c:39

References bl1_cconjv(), bl1_cswap(), bl1_does_conj(), bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Swap_external(), and FLA_Swapt_external().

◆ bl1_cswapv()

void bl1_cswapv ( int  n,
scomplex x,
int  incx,
scomplex y,
int  incy 
)
34{
35 // Return early if possible.
36 if ( bl1_zero_dim1( n ) ) return;
37
38 bl1_cswap( n,
39 x, incx,
40 y, incy );
41}

References bl1_cswap(), and bl1_zero_dim1().

Referenced by FLA_Apply_pivots_macro_external(), FLA_Sort_bsvd_ext_b_opc(), and FLA_Sort_bsvd_ext_f_opc().

◆ bl1_czcopymr()

void bl1_czcopymr ( uplo1_t  uplo,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)
1117{
1120 int lda, inca;
1121 int ldb, incb;
1122 int n_iter;
1123 int n_elem_max;
1124 int n_elem;
1125 int j;
1126
1127 // Return early if possible.
1128 if ( bl1_zero_dim2( m, n ) ) return;
1129
1130 // We initialize for column-major.
1131 n_iter = n;
1132 n_elem_max = m;
1133 lda = a_cs;
1134 inca = a_rs;
1135 ldb = b_cs;
1136 incb = b_rs;
1137
1138 // An optimization: if B is row-major, then let's access the matrix
1139 // by rows instead of by columns for increased spatial locality.
1140 if ( bl1_is_row_storage( b_rs, b_cs ) )
1141 {
1145 bl1_toggle_uplo( uplo );
1146 }
1147
1148
1149 if ( bl1_is_upper( uplo ) )
1150 {
1151 for ( j = 0; j < n_iter; j++ )
1152 {
1153 n_elem = bl1_min( j + 1, n_elem_max );
1154 a_begin = a + j*lda;
1155 b_begin = b + j*ldb;
1156
1158 n_elem,
1159 a_begin, inca,
1160 b_begin, incb );
1161 }
1162 }
1163 else // if ( bl1_is_lower( uplo ) )
1164 {
1165 for ( j = 0; j < n_iter; j++ )
1166 {
1167 n_elem = bl1_max( 0, n_elem_max - j );
1168 a_begin = a + j*lda + j*inca;
1169 b_begin = b + j*ldb + j*incb;
1170
1171 if ( n_elem <= 0 ) break;
1172
1174 n_elem,
1175 a_begin, inca,
1176 b_begin, incb );
1177 }
1178 }
1179}
void bl1_czcopyv(conj1_t conj, int m, scomplex *x, int incx, dcomplex *y, int incy)
Definition bl1_copyv.c:304
Definition blis_type_defs.h:138

References bl1_czcopyv(), bl1_is_row_storage(), bl1_is_upper(), bl1_zero_dim2(), and BLIS1_NO_CONJUGATE.

Referenced by FLA_Copyr_external().

◆ bl1_czcopymrt()

void bl1_czcopymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)
1603{
1606 int lda, inca;
1607 int ldb, incb;
1608 int n_iter;
1609 int n_elem;
1610 int n_elem_max;
1612 int j;
1613 conj1_t conj;
1614
1615 // Return early if possible.
1616 if ( bl1_zero_dim2( m, n ) ) return;
1617
1618 // Initialize variables based on storage format of B and value of uplo.
1619 if ( bl1_is_col_storage( b_rs, b_cs ) )
1620 {
1621 if ( bl1_is_lower( uplo ) )
1622 {
1623 n_iter = bl1_min( m, n );
1624 n_elem_max = m;
1625 lda = a_cs;
1626 inca = a_rs;
1627 ldb = b_cs;
1628 incb = b_rs;
1630 }
1631 else // if ( bl1_is_upper( uplo ) )
1632 {
1633 n_iter = n;
1634 n_elem_max = bl1_min( m, n );
1635 lda = a_cs;
1636 inca = a_rs;
1637 ldb = b_cs;
1638 incb = b_rs;
1640 }
1641 }
1642 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
1643 {
1644 if ( bl1_is_lower( uplo ) )
1645 {
1646 n_iter = m;
1647 n_elem_max = bl1_min( m, n );
1648 lda = a_rs;
1649 inca = a_cs;
1650 ldb = b_rs;
1651 incb = b_cs;
1653 }
1654 else // if ( bl1_is_upper( uplo ) )
1655 {
1656 n_iter = bl1_min( m, n );
1657 n_elem_max = n;
1658 lda = a_rs;
1659 inca = a_cs;
1660 ldb = b_rs;
1661 incb = b_cs;
1663 }
1664 }
1665
1666 // Swap lda and inca if we're doing a transpose.
1667 if ( bl1_does_trans( trans ) )
1668 {
1670 }
1671
1672 // Extract conj component from trans parameter.
1674
1675 // Choose the loop based on whether n_elem will be shrinking or growing
1676 // with each iteration.
1678 {
1679 for ( j = 0; j < n_iter; j++ )
1680 {
1681 n_elem = n_elem_max - j;
1682 a_begin = a + j*lda + j*inca;
1683 b_begin = b + j*ldb + j*incb;
1684
1686 n_elem,
1687 a_begin, inca,
1688 b_begin, incb );
1689 }
1690 }
1691 else // if ( n_elem_is_ascending )
1692 {
1693 for ( j = 0; j < n_iter; j++ )
1694 {
1695 n_elem = bl1_min( j + 1, n_elem_max );
1696 a_begin = a + j*lda;
1697 b_begin = b + j*ldb;
1698
1700 n_elem,
1701 a_begin, inca,
1702 b_begin, incb );
1703 }
1704 }
1705}

References bl1_czcopyv(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), and bl1_zero_dim2().

Referenced by FLA_Copyrt_external().

◆ bl1_czcopymt()

void bl1_czcopymt ( trans1_t  trans,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)
1235{
1238 int lda, inca;
1239 int ldb, incb;
1240 int n_iter;
1241 int n_elem;
1242 int j;
1243 conj1_t conj;
1244
1245 // Return early if possible.
1246 if ( bl1_zero_dim2( m, n ) ) return;
1247
1248 // Handle cases where A and B are vectors to ensure that the underlying copy
1249 // gets invoked only once.
1250 if ( bl1_is_vector( m, n ) )
1251 {
1252 // Initialize with values appropriate for vectors.
1253 n_iter = 1;
1254 n_elem = bl1_vector_dim( m, n );
1255 lda = 1; // multiplied by zero when n_iter == 1; not needed.
1256 inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
1257 ldb = 1; // multiplied by zero when n_iter == 1; not needed.
1259 }
1260 else // matrix case
1261 {
1262 // Initialize with optimal values for column-major storage of B.
1263 n_iter = n;
1264 n_elem = m;
1265 lda = a_cs;
1266 inca = a_rs;
1267 ldb = b_cs;
1268 incb = b_rs;
1269
1270 // Handle the transposition of A.
1271 if ( bl1_does_trans( trans ) )
1272 {
1274 }
1275
1276 // An optimization: if B is row-major, then let's access the matrix by rows
1277 // instead of by columns for increased spatial locality.
1278 if ( bl1_is_row_storage( b_rs, b_cs ) )
1279 {
1283 }
1284 }
1285
1286 // Extract conj component from trans parameter.
1288
1289 for ( j = 0; j < n_iter; ++j )
1290 {
1291 a_begin = a + j*lda;
1292 b_begin = b + j*ldb;
1293
1295 n_elem,
1296 a_begin, inca,
1297 b_begin, incb );
1298 }
1299}

References bl1_czcopyv(), bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Copy_external(), and FLA_Copyt_external().

◆ bl1_czcopyv()

void bl1_czcopyv ( conj1_t  conj,
int  m,
scomplex x,
int  incx,
dcomplex y,
int  incy 
)
305{
306 scomplex* chi;
307 dcomplex* psi;
308 int i;
309
310 // Return early if possible.
311 if ( bl1_zero_dim1( m ) ) return;
312
313 // Initialize pointers.
314 chi = x;
315 psi = y;
316
317 for ( i = 0; i < m; ++i )
318 {
319 psi->real = chi->real;
320 psi->imag = chi->imag;
321
322 chi += incx;
323 psi += incy;
324 }
325
326 if ( bl1_is_conj( conj ) )
327 bl1_zconjv( m,
328 y, incy );
329}
void bl1_zconjv(int m, dcomplex *x, int incx)
Definition bl1_conjv.c:34
double real
Definition blis_type_defs.h:139

References bl1_is_conj(), bl1_zconjv(), bl1_zero_dim1(), i, scomplex::imag, dcomplex::imag, scomplex::real, and dcomplex::real.

Referenced by bl1_czcopymr(), bl1_czcopymrt(), and bl1_czcopymt().

◆ bl1_damax()

void bl1_damax ( int  n,
double x,
int  incx,
int index 
)
25{
26#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
27 *index = cblas_idamax( n,
28 x, incx );
29#else
30 *index = F77_idamax( &n,
31 x, &incx ) - 1;
32#endif
33}
int F77_idamax(int *n, double *x, int *incx)
CBLAS_INDEX cblas_idamax(const int N, const double *X, const int incX)

References cblas_idamax(), and F77_idamax().

Referenced by FLA_Amax_external(), FLA_LU_piv_opd_var3(), FLA_LU_piv_opd_var4(), FLA_LU_piv_opd_var5(), and FLA_SA_LU_unb().

◆ bl1_dasum()

void bl1_dasum ( int  n,
double x,
int  incx,
double norm 
)
25{
26#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
27 *norm = cblas_dasum( n,
28 x, incx );
29#else
30 *norm = F77_dasum( &n,
31 x, &incx );
32#endif
33}
double F77_dasum(int *n, double *x, int *incx)
double cblas_dasum(const int N, const double *X, const int incX)

References cblas_dasum(), and F77_dasum().

Referenced by FLA_Asum_external().

◆ bl1_daxpy()

void bl1_daxpy ( int  n,
double alpha,
double x,
int  incx,
double y,
int  incy 
)
29{
30#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
31 cblas_daxpy( n,
32 *alpha,
33 x, incx,
34 y, incy );
35#else
36 F77_daxpy( &n,
37 alpha,
38 x, &incx,
39 y, &incy );
40#endif
41}
void F77_daxpy(int *n, double *alpha, double *x, int *incx, double *y, int *incy)
void cblas_daxpy(const int N, const double alpha, const double *X, const int incX, double *Y, const int incY)

References cblas_daxpy(), and F77_daxpy().

Referenced by bl1_daxpymt(), bl1_daxpysmt(), bl1_daxpysv(), and bl1_daxpyv().

◆ bl1_daxpymrt()

void bl1_daxpymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
double alpha,
double a,
int  a_rs,
int  a_cs,
double b,
int  b_rs,
int  b_cs 
)
121{
122 double* a_begin;
123 double* b_begin;
124 int lda, inca;
125 int ldb, incb;
126 int n_iter;
127 int n_elem;
128 int n_elem_max;
130 int j;
132
133 // Return early if possible.
134 if ( bl1_zero_dim2( m, n ) ) return;
135
136 // Initialize variables based on storage format of B and value of uplo.
137 if ( bl1_is_col_storage( b_rs, b_cs ) )
138 {
139 if ( bl1_is_lower( uplo ) )
140 {
141 n_iter = bl1_min( m, n );
142 n_elem_max = m;
143 lda = a_cs;
144 inca = a_rs;
145 ldb = b_cs;
146 incb = b_rs;
148 }
149 else // if ( bl1_is_upper( uplo ) )
150 {
151 n_iter = n;
152 n_elem_max = bl1_min( m, n );
153 lda = a_cs;
154 inca = a_rs;
155 ldb = b_cs;
156 incb = b_rs;
158 }
159 }
160 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
161 {
162 if ( bl1_is_lower( uplo ) )
163 {
164 n_iter = m;
165 n_elem_max = bl1_min( m, n );
166 lda = a_rs;
167 inca = a_cs;
168 ldb = b_rs;
169 incb = b_cs;
171 }
172 else // if ( bl1_is_upper( uplo ) )
173 {
174 n_iter = bl1_min( m, n );
175 n_elem_max = n;
176 lda = a_rs;
177 inca = a_cs;
178 ldb = b_rs;
179 incb = b_cs;
181 }
182 }
183
184 // Swap lda and inca if we're doing a transpose.
185 if ( bl1_does_trans( trans ) )
186 {
188 }
189
190 // Extract conj component from trans parameter.
192
193 // Choose the loop based on whether n_elem will be shrinking or growing
194 // with each iteration.
196 {
197 for ( j = 0; j < n_iter; j++ )
198 {
199 n_elem = n_elem_max - j;
200 a_begin = a + j*lda + j*inca;
201 b_begin = b + j*ldb + j*incb;
202
204 n_elem,
205 alpha,
206 a_begin, inca,
207 b_begin, incb );
208 }
209 }
210 else // if ( n_elem_is_ascending )
211 {
212 for ( j = 0; j < n_iter; j++ )
213 {
214 n_elem = bl1_min( j + 1, n_elem_max );
215 a_begin = a + j*lda;
216 b_begin = b + j*ldb;
217
219 n_elem,
220 alpha,
221 a_begin, inca,
222 b_begin, incb );
223 }
224 }
225}
void bl1_daxpyv(conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy)
Definition bl1_axpyv.c:21

References bl1_daxpyv(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), and bl1_zero_dim2().

Referenced by FLA_Axpyrt_external().

◆ bl1_daxpymt()

void bl1_daxpymt ( trans1_t  trans,
int  m,
int  n,
double alpha,
double a,
int  a_rs,
int  a_cs,
double b,
int  b_rs,
int  b_cs 
)
82{
83 double* a_begin;
84 double* b_begin;
85 int lda, inca;
86 int ldb, incb;
87 int n_iter;
88 int n_elem;
89 int j;
90
91 // Return early if possible.
92 if ( bl1_zero_dim2( m, n ) ) return;
93
94 // Handle cases where A and B are vectors to ensure that the underlying axpy
95 // gets invoked only once.
96 if ( bl1_is_vector( m, n ) )
97 {
98 // Initialize with values appropriate for vectors.
99 n_iter = 1;
100 n_elem = bl1_vector_dim( m, n );
101 lda = 1; // multiplied by zero when n_iter == 1; not needed.
102 inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
103 ldb = 1; // multiplied by zero when n_iter == 1; not needed.
105 }
106 else // matrix case
107 {
108 // Initialize with optimal values for column-major storage.
109 n_iter = n;
110 n_elem = m;
111 lda = a_cs;
112 inca = a_rs;
113 ldb = b_cs;
114 incb = b_rs;
115
116 // Handle the transposition of A.
117 if ( bl1_does_trans( trans ) )
118 {
120 }
121
122 // An optimization: if B is row-major and if A is effectively row-major
123 // after a possible transposition, then let's access the matrices by rows
124 // instead of by columns for increased spatial locality.
125 if ( bl1_is_row_storage( b_rs, b_cs ) )
126 {
127 if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
129 {
133 }
134 }
135 }
136
137 for ( j = 0; j < n_iter; j++ )
138 {
139 a_begin = a + j*lda;
140 b_begin = b + j*ldb;
141
143 alpha,
144 a_begin, inca,
145 b_begin, incb );
146 }
147}
void bl1_daxpy(int n, double *alpha, double *x, int incx, double *y, int incy)
Definition bl1_axpy.c:28

References bl1_daxpy(), bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by bl1_dgemm(), bl1_dsymm(), bl1_dtrmmsx(), bl1_dtrsmsx(), FLA_Axpy_external(), and FLA_Axpyt_external().

◆ bl1_daxpysmt()

void bl1_daxpysmt ( trans1_t  trans,
int  m,
int  n,
double alpha0,
double alpha1,
double a,
int  a_rs,
int  a_cs,
double beta,
double b,
int  b_rs,
int  b_cs 
)
89{
90 double* a_begin;
91 double* b_begin;
92 double alpha_prod;
93 int lda, inca;
94 int ldb, incb;
95 int n_iter;
96 int n_elem;
97 int j;
98
99 // Return early if possible.
100 if ( bl1_zero_dim2( m, n ) ) return;
101
102 alpha_prod = (*alpha0) * (*alpha1);
103
104 // Handle cases where A and B are vectors to ensure that the underlying axpy
105 // gets invoked only once.
106 if ( bl1_is_vector( m, n ) )
107 {
108 // Initialize with values appropriate for vectors.
109 n_iter = 1;
110 n_elem = bl1_vector_dim( m, n );
111 lda = 1; // multiplied by zero when n_iter == 1; not needed.
112 inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
113 ldb = 1; // multiplied by zero when n_iter == 1; not needed.
115 }
116 else // matrix case
117 {
118 // Initialize with optimal values for column-major storage.
119 n_iter = n;
120 n_elem = m;
121 lda = a_cs;
122 inca = a_rs;
123 ldb = b_cs;
124 incb = b_rs;
125
126 // Handle the transposition of A.
127 if ( bl1_does_trans( trans ) )
128 {
130 }
131
132 // An optimization: if B is row-major and if A is effectively row-major
133 // after a possible transposition, then let's access the matrices by rows
134 // instead of by columns for increased spatial locality.
135 if ( bl1_is_row_storage( b_rs, b_cs ) )
136 {
137 if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
139 {
143 }
144 }
145 }
146
147 for ( j = 0; j < n_iter; j++ )
148 {
149 a_begin = a + j*lda;
150 b_begin = b + j*ldb;
151
153 beta,
154 b_begin, incb );
155
157 &alpha_prod,
158 a_begin, inca,
159 b_begin, incb );
160 }
161}
void bl1_dscal(int n, double *alpha, double *x, int incx)
Definition bl1_scal.c:26

References bl1_daxpy(), bl1_does_notrans(), bl1_does_trans(), bl1_dscal(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Axpys_external().

◆ bl1_daxpysv()

void bl1_daxpysv ( int  n,
double alpha0,
double alpha1,
double x,
int  incx,
double beta,
double y,
int  incy 
)
33{
34 double alpha_prod;
35
36 // Return early if possible.
37 if ( bl1_zero_dim1( n ) ) return;
38
39 alpha_prod = (*alpha0) * (*alpha1);
40
41 bl1_dscal( n,
42 beta,
43 y, incy );
44
45 bl1_daxpy( n,
47 x, incx,
48 y, incy );
49}

References bl1_daxpy(), bl1_dscal(), and bl1_zero_dim1().

Referenced by FLA_Lyap_h_opd_var2(), FLA_Lyap_h_opd_var3(), FLA_Lyap_h_opd_var4(), FLA_Lyap_n_opd_var2(), FLA_Lyap_n_opd_var3(), and FLA_Lyap_n_opd_var4().

◆ bl1_daxpyv()

void bl1_daxpyv ( conj1_t  conj,
int  n,
double alpha,
double x,
int  incx,
double y,
int  incy 
)

◆ bl1_dccopymr()

void bl1_dccopymr ( uplo1_t  uplo,
int  m,
int  n,
double a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)
791{
792 double* a_begin;
794 int lda, inca;
795 int ldb, incb;
796 int n_iter;
797 int n_elem_max;
798 int n_elem;
799 int j;
800
801 // Return early if possible.
802 if ( bl1_zero_dim2( m, n ) ) return;
803
804 // We initialize for column-major.
805 n_iter = n;
806 n_elem_max = m;
807 lda = a_cs;
808 inca = a_rs;
809 ldb = b_cs;
810 incb = b_rs;
811
812 // An optimization: if B is row-major, then let's access the matrix
813 // by rows instead of by columns for increased spatial locality.
814 if ( bl1_is_row_storage( b_rs, b_cs ) )
815 {
819 bl1_toggle_uplo( uplo );
820 }
821
822
823 if ( bl1_is_upper( uplo ) )
824 {
825 for ( j = 0; j < n_iter; j++ )
826 {
827 n_elem = bl1_min( j + 1, n_elem_max );
828 a_begin = a + j*lda;
829 b_begin = b + j*ldb;
830
832 n_elem,
833 a_begin, inca,
834 b_begin, incb );
835 }
836 }
837 else // if ( bl1_is_lower( uplo ) )
838 {
839 for ( j = 0; j < n_iter; j++ )
840 {
841 n_elem = bl1_max( 0, n_elem_max - j );
842 a_begin = a + j*lda + j*inca;
843 b_begin = b + j*ldb + j*incb;
844
845 if ( n_elem <= 0 ) break;
846
848 n_elem,
849 a_begin, inca,
850 b_begin, incb );
851 }
852 }
853}
void bl1_dccopyv(conj1_t conj, int m, double *x, int incx, scomplex *y, int incy)
Definition bl1_copyv.c:214

References bl1_dccopyv(), bl1_is_row_storage(), bl1_is_upper(), bl1_zero_dim2(), and BLIS1_NO_CONJUGATE.

Referenced by FLA_Copyr_external().

◆ bl1_dccopymrt()

void bl1_dccopymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
double a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)
1073{
1074 double* a_begin;
1076 int lda, inca;
1077 int ldb, incb;
1078 int n_iter;
1079 int n_elem;
1080 int n_elem_max;
1082 int j;
1083 conj1_t conj;
1084
1085 // Return early if possible.
1086 if ( bl1_zero_dim2( m, n ) ) return;
1087
1088 // Initialize variables based on storage format of B and value of uplo.
1089 if ( bl1_is_col_storage( b_rs, b_cs ) )
1090 {
1091 if ( bl1_is_lower( uplo ) )
1092 {
1093 n_iter = bl1_min( m, n );
1094 n_elem_max = m;
1095 lda = a_cs;
1096 inca = a_rs;
1097 ldb = b_cs;
1098 incb = b_rs;
1100 }
1101 else // if ( bl1_is_upper( uplo ) )
1102 {
1103 n_iter = n;
1104 n_elem_max = bl1_min( m, n );
1105 lda = a_cs;
1106 inca = a_rs;
1107 ldb = b_cs;
1108 incb = b_rs;
1110 }
1111 }
1112 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
1113 {
1114 if ( bl1_is_lower( uplo ) )
1115 {
1116 n_iter = m;
1117 n_elem_max = bl1_min( m, n );
1118 lda = a_rs;
1119 inca = a_cs;
1120 ldb = b_rs;
1121 incb = b_cs;
1123 }
1124 else // if ( bl1_is_upper( uplo ) )
1125 {
1126 n_iter = bl1_min( m, n );
1127 n_elem_max = n;
1128 lda = a_rs;
1129 inca = a_cs;
1130 ldb = b_rs;
1131 incb = b_cs;
1133 }
1134 }
1135
1136 // Swap lda and inca if we're doing a transpose.
1137 if ( bl1_does_trans( trans ) )
1138 {
1140 }
1141
1142 // Extract conj component from trans parameter.
1144
1145 // Choose the loop based on whether n_elem will be shrinking or growing
1146 // with each iteration.
1148 {
1149 for ( j = 0; j < n_iter; j++ )
1150 {
1151 n_elem = n_elem_max - j;
1152 a_begin = a + j*lda + j*inca;
1153 b_begin = b + j*ldb + j*incb;
1154
1156 n_elem,
1157 a_begin, inca,
1158 b_begin, incb );
1159 }
1160 }
1161 else // if ( n_elem_is_ascending )
1162 {
1163 for ( j = 0; j < n_iter; j++ )
1164 {
1165 n_elem = bl1_min( j + 1, n_elem_max );
1166 a_begin = a + j*lda;
1167 b_begin = b + j*ldb;
1168
1170 n_elem,
1171 a_begin, inca,
1172 b_begin, incb );
1173 }
1174 }
1175}

References bl1_dccopyv(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), and bl1_zero_dim2().

Referenced by FLA_Copyrt_external().

◆ bl1_dccopymt()

void bl1_dccopymt ( trans1_t  trans,
int  m,
int  n,
double a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)
899{
900 double* a_begin;
902 int lda, inca;
903 int ldb, incb;
904 int n_iter;
905 int n_elem;
906 int j;
908
909 // Return early if possible.
910 if ( bl1_zero_dim2( m, n ) ) return;
911
912 // Handle cases where A and B are vectors to ensure that the underlying copy
913 // gets invoked only once.
914 if ( bl1_is_vector( m, n ) )
915 {
916 // Initialize with values appropriate for vectors.
917 n_iter = 1;
918 n_elem = bl1_vector_dim( m, n );
919 lda = 1; // multiplied by zero when n_iter == 1; not needed.
920 inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
921 ldb = 1; // multiplied by zero when n_iter == 1; not needed.
923 }
924 else // matrix case
925 {
926 // Initialize with optimal values for column-major storage of B.
927 n_iter = n;
928 n_elem = m;
929 lda = a_cs;
930 inca = a_rs;
931 ldb = b_cs;
932 incb = b_rs;
933
934 // Handle the transposition of A.
935 if ( bl1_does_trans( trans ) )
936 {
938 }
939
940 // An optimization: if B is row-major, then let's access the matrix by rows
941 // instead of by columns for increased spatial locality.
942 if ( bl1_is_row_storage( b_rs, b_cs ) )
943 {
947 }
948 }
949
950 // Extract conj component from trans parameter.
952
953 for ( j = 0; j < n_iter; ++j )
954 {
955 a_begin = a + j*lda;
956 b_begin = b + j*ldb;
957
959 n_elem,
960 a_begin, inca,
961 b_begin, incb );
962 }
963}

References bl1_dccopyv(), bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Copy_external(), and FLA_Copyt_external().

◆ bl1_dccopyv()

void bl1_dccopyv ( conj1_t  conj,
int  m,
double x,
int  incx,
scomplex y,
int  incy 
)
215{
216 double* chi;
217 scomplex* psi;
218 int i;
219
220 // Return early if possible.
221 if ( bl1_zero_dim1( m ) ) return;
222
223 // Initialize pointers.
224 chi = x;
225 psi = y;
226
227 for ( i = 0; i < m; ++i )
228 {
229 psi->real = *chi;
230 psi->imag = 0.0F;
231
232 chi += incx;
233 psi += incy;
234 }
235}

References bl1_zero_dim1(), i, scomplex::imag, and scomplex::real.

Referenced by bl1_dccopymr(), bl1_dccopymrt(), and bl1_dccopymt().

◆ bl1_dconjm()

void bl1_dconjm ( int  m,
int  n,
double a,
int  a_rs,
int  a_cs 
)
19{
20 return;
21}

◆ bl1_dconjmr()

void bl1_dconjmr ( uplo1_t  uplo,
int  m,
int  n,
double a,
int  a_rs,
int  a_cs 
)
19{
20 return;
21}

◆ bl1_dconjv()

void bl1_dconjv ( int  m,
double x,
int  incx 
)

◆ bl1_dcopy()

void bl1_dcopy ( int  m,
double x,
int  incx,
double y,
int  incy 
)
27{
28#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
29 cblas_dcopy( m,
30 x, incx,
31 y, incy );
32#else
33 F77_dcopy( &m,
34 x, &incx,
35 y, &incy );
36#endif
37}
void F77_dcopy(int *n, double *x, int *incx, double *y, int *incy)
void cblas_dcopy(const int N, const double *X, const int incX, double *Y, const int incY)

References cblas_dcopy(), and F77_dcopy().

Referenced by bl1_dcopymr(), bl1_dcopymt(), bl1_dcopyv(), FLA_Obj_extract_imag_part(), FLA_Obj_extract_real_part(), and FLA_SA_LU_unb().

◆ bl1_dcopymr()

void bl1_dcopymr ( uplo1_t  uplo,
int  m,
int  n,
double a,
int  a_rs,
int  a_cs,
double b,
int  b_rs,
int  b_cs 
)
77{
78 double* a_begin;
79 double* b_begin;
80 int lda, inca;
81 int ldb, incb;
82 int n_iter;
83 int n_elem_max;
84 int n_elem;
85 int j;
86
87 // Return early if possible.
88 if ( bl1_zero_dim2( m, n ) ) return;
89
90 // We initialize for column-major.
91 n_iter = n;
92 n_elem_max = m;
93 lda = a_cs;
94 inca = a_rs;
95 ldb = b_cs;
96 incb = b_rs;
97
98 // An optimization: if A and B are both row-major, then let's access the
99 // matrices by rows instead of by columns for increased spatial locality.
101 {
105 bl1_toggle_uplo( uplo );
106 }
107
108
109 if ( bl1_is_upper( uplo ) )
110 {
111 for ( j = 0; j < n_iter; j++ )
112 {
113 n_elem = bl1_min( j + 1, n_elem_max );
114 a_begin = a + j*lda;
115 b_begin = b + j*ldb;
116
118 a_begin, inca,
119 b_begin, incb );
120 }
121 }
122 else // if ( bl1_is_lower( uplo ) )
123 {
124 for ( j = 0; j < n_iter; j++ )
125 {
126 n_elem = bl1_max( 0, n_elem_max - j );
127 a_begin = a + j*lda + j*inca;
128 b_begin = b + j*ldb + j*incb;
129
130 if ( n_elem <= 0 ) break;
131
133 a_begin, inca,
134 b_begin, incb );
135 }
136 }
137}
void bl1_dcopy(int m, double *x, int incx, double *y, int incy)
Definition bl1_copy.c:26

References bl1_dcopy(), bl1_is_row_storage(), bl1_is_upper(), and bl1_zero_dim2().

Referenced by bl1_dcreate_contigmr(), bl1_dfree_saved_contigmr(), and FLA_Copyr_external().

◆ bl1_dcopymrt()

void bl1_dcopymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
double a,
int  a_rs,
int  a_cs,
double b,
int  b_rs,
int  b_cs 
)
119{
120 double* a_begin;
121 double* b_begin;
122 int lda, inca;
123 int ldb, incb;
124 int n_iter;
125 int n_elem;
126 int n_elem_max;
128 int j;
130
131 // Return early if possible.
132 if ( bl1_zero_dim2( m, n ) ) return;
133
134 // Initialize variables based on storage format of B and value of uplo.
135 if ( bl1_is_col_storage( b_rs, b_cs ) )
136 {
137 if ( bl1_is_lower( uplo ) )
138 {
139 n_iter = bl1_min( m, n );
140 n_elem_max = m;
141 lda = a_cs;
142 inca = a_rs;
143 ldb = b_cs;
144 incb = b_rs;
146 }
147 else // if ( bl1_is_upper( uplo ) )
148 {
149 n_iter = n;
150 n_elem_max = bl1_min( m, n );
151 lda = a_cs;
152 inca = a_rs;
153 ldb = b_cs;
154 incb = b_rs;
156 }
157 }
158 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
159 {
160 if ( bl1_is_lower( uplo ) )
161 {
162 n_iter = m;
163 n_elem_max = bl1_min( m, n );
164 lda = a_rs;
165 inca = a_cs;
166 ldb = b_rs;
167 incb = b_cs;
169 }
170 else // if ( bl1_is_upper( uplo ) )
171 {
172 n_iter = bl1_min( m, n );
173 n_elem_max = n;
174 lda = a_rs;
175 inca = a_cs;
176 ldb = b_rs;
177 incb = b_cs;
179 }
180 }
181
182 // Swap lda and inca if we're doing a transpose.
183 if ( bl1_does_trans( trans ) )
184 {
186 }
187
188 // Extract conj component from trans parameter.
190
191 // Choose the loop based on whether n_elem will be shrinking or growing
192 // with each iteration.
194 {
195 for ( j = 0; j < n_iter; j++ )
196 {
197 n_elem = n_elem_max - j;
198 a_begin = a + j*lda + j*inca;
199 b_begin = b + j*ldb + j*incb;
200
202 n_elem,
203 a_begin, inca,
204 b_begin, incb );
205 }
206 }
207 else // if ( n_elem_is_ascending )
208 {
209 for ( j = 0; j < n_iter; j++ )
210 {
211 n_elem = bl1_min( j + 1, n_elem_max );
212 a_begin = a + j*lda;
213 b_begin = b + j*ldb;
214
216 n_elem,
217 a_begin, inca,
218 b_begin, incb );
219 }
220 }
221}
void bl1_dcopyv(conj1_t conj, int m, double *x, int incx, double *y, int incy)
Definition bl1_copyv.c:42

References bl1_dcopyv(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), and bl1_zero_dim2().

Referenced by FLA_Copyrt_external(), FLA_Lyap_h_opd_var1(), FLA_Lyap_h_opd_var2(), FLA_Lyap_h_opd_var3(), FLA_Lyap_h_opd_var4(), FLA_Lyap_n_opd_var1(), FLA_Lyap_n_opd_var2(), FLA_Lyap_n_opd_var3(), and FLA_Lyap_n_opd_var4().

◆ bl1_dcopymt()

void bl1_dcopymt ( trans1_t  trans,
int  m,
int  n,
double a,
int  a_rs,
int  a_cs,
double b,
int  b_rs,
int  b_cs 
)
149{
150 double* a_begin;
151 double* b_begin;
152 int lda, inca;
153 int ldb, incb;
154 int n_iter;
155 int n_elem;
156 int j;
157
158 // Return early if possible.
159 if ( bl1_zero_dim2( m, n ) ) return;
160
161 // Handle cases where A and B are vectors to ensure that the underlying copy
162 // gets invoked only once.
163 if ( bl1_is_vector( m, n ) )
164 {
165 // Initialize with values appropriate for vectors.
166 n_iter = 1;
167 n_elem = bl1_vector_dim( m, n );
168 lda = 1; // multiplied by zero when n_iter == 1; not needed.
169 inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
170 ldb = 1; // multiplied by zero when n_iter == 1; not needed.
172 }
173 else // matrix case
174 {
175 // Initialize with optimal values for column-major storage.
176 n_iter = n;
177 n_elem = m;
178 lda = a_cs;
179 inca = a_rs;
180 ldb = b_cs;
181 incb = b_rs;
182
183 // Handle the transposition of A.
184 if ( bl1_does_trans( trans ) )
185 {
187 }
188
189 // An optimization: if B is row-major and if A is effectively row-major
190 // after a possible transposition, then let's access the matrix by rows
191 // instead of by columns for increased spatial locality.
192 if ( bl1_is_row_storage( b_rs, b_cs ) )
193 {
194 if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
196 {
200 }
201 }
202 }
203
204 for ( j = 0; j < n_iter; j++ )
205 {
206 a_begin = a + j*lda;
207 b_begin = b + j*ldb;
208
210 a_begin, inca,
211 b_begin, incb );
212 }
213}

References bl1_dcopy(), bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by bl1_dcreate_contigm(), bl1_dcreate_contigmt(), bl1_dfree_saved_contigm(), bl1_dfree_saved_contigmsr(), bl1_dsymm(), bl1_dsyr2k(), bl1_dtrmmsx(), bl1_dtrsmsx(), FLA_Bsvd_v_opd_var2(), FLA_Copy_external(), FLA_Copyt_external(), and FLA_Tevd_v_opd_var2().

◆ bl1_dcopyv()

void bl1_dcopyv ( conj1_t  conj,
int  m,
double x,
int  incx,
double y,
int  incy 
)

◆ bl1_ddcopymr()

void bl1_ddcopymr ( uplo1_t  uplo,
int  m,
int  n,
double a,
int  a_rs,
int  a_cs,
double b,
int  b_rs,
int  b_cs 
)
725{
726 double* a_begin;
727 double* b_begin;
728 int lda, inca;
729 int ldb, incb;
730 int n_iter;
731 int n_elem_max;
732 int n_elem;
733 int j;
734
735 // Return early if possible.
736 if ( bl1_zero_dim2( m, n ) ) return;
737
738 // We initialize for column-major.
739 n_iter = n;
740 n_elem_max = m;
741 lda = a_cs;
742 inca = a_rs;
743 ldb = b_cs;
744 incb = b_rs;
745
746 // An optimization: if B is row-major, then let's access the matrix
747 // by rows instead of by columns for increased spatial locality.
748 if ( bl1_is_row_storage( b_rs, b_cs ) )
749 {
753 bl1_toggle_uplo( uplo );
754 }
755
756
757 if ( bl1_is_upper( uplo ) )
758 {
759 for ( j = 0; j < n_iter; j++ )
760 {
761 n_elem = bl1_min( j + 1, n_elem_max );
762 a_begin = a + j*lda;
763 b_begin = b + j*ldb;
764
766 n_elem,
767 a_begin, inca,
768 b_begin, incb );
769 }
770 }
771 else // if ( bl1_is_lower( uplo ) )
772 {
773 for ( j = 0; j < n_iter; j++ )
774 {
775 n_elem = bl1_max( 0, n_elem_max - j );
776 a_begin = a + j*lda + j*inca;
777 b_begin = b + j*ldb + j*incb;
778
779 if ( n_elem <= 0 ) break;
780
782 n_elem,
783 a_begin, inca,
784 b_begin, incb );
785 }
786 }
787}

References bl1_dcopyv(), bl1_is_row_storage(), bl1_is_upper(), bl1_zero_dim2(), and BLIS1_NO_CONJUGATE.

◆ bl1_ddcopymrt()

void bl1_ddcopymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
double a,
int  a_rs,
int  a_cs,
double b,
int  b_rs,
int  b_cs 
)
967{
968 double* a_begin;
969 double* b_begin;
970 int lda, inca;
971 int ldb, incb;
972 int n_iter;
973 int n_elem;
974 int n_elem_max;
976 int j;
978
979 // Return early if possible.
980 if ( bl1_zero_dim2( m, n ) ) return;
981
982 // Initialize variables based on storage format of B and value of uplo.
983 if ( bl1_is_col_storage( b_rs, b_cs ) )
984 {
985 if ( bl1_is_lower( uplo ) )
986 {
987 n_iter = bl1_min( m, n );
988 n_elem_max = m;
989 lda = a_cs;
990 inca = a_rs;
991 ldb = b_cs;
992 incb = b_rs;
994 }
995 else // if ( bl1_is_upper( uplo ) )
996 {
997 n_iter = n;
998 n_elem_max = bl1_min( m, n );
999 lda = a_cs;
1000 inca = a_rs;
1001 ldb = b_cs;
1002 incb = b_rs;
1004 }
1005 }
1006 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
1007 {
1008 if ( bl1_is_lower( uplo ) )
1009 {
1010 n_iter = m;
1011 n_elem_max = bl1_min( m, n );
1012 lda = a_rs;
1013 inca = a_cs;
1014 ldb = b_rs;
1015 incb = b_cs;
1017 }
1018 else // if ( bl1_is_upper( uplo ) )
1019 {
1020 n_iter = bl1_min( m, n );
1021 n_elem_max = n;
1022 lda = a_rs;
1023 inca = a_cs;
1024 ldb = b_rs;
1025 incb = b_cs;
1027 }
1028 }
1029
1030 // Swap lda and inca if we're doing a transpose.
1031 if ( bl1_does_trans( trans ) )
1032 {
1034 }
1035
1036 // Extract conj component from trans parameter.
1038
1039 // Choose the loop based on whether n_elem will be shrinking or growing
1040 // with each iteration.
1042 {
1043 for ( j = 0; j < n_iter; j++ )
1044 {
1045 n_elem = n_elem_max - j;
1046 a_begin = a + j*lda + j*inca;
1047 b_begin = b + j*ldb + j*incb;
1048
1050 n_elem,
1051 a_begin, inca,
1052 b_begin, incb );
1053 }
1054 }
1055 else // if ( n_elem_is_ascending )
1056 {
1057 for ( j = 0; j < n_iter; j++ )
1058 {
1059 n_elem = bl1_min( j + 1, n_elem_max );
1060 a_begin = a + j*lda;
1061 b_begin = b + j*ldb;
1062
1064 n_elem,
1065 a_begin, inca,
1066 b_begin, incb );
1067 }
1068 }
1069}

References bl1_dcopyv(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), and bl1_zero_dim2().

◆ bl1_ddcopymt()

void bl1_ddcopymt ( trans1_t  trans,
int  m,
int  n,
double a,
int  a_rs,
int  a_cs,
double b,
int  b_rs,
int  b_cs 
)
831{
832 double* a_begin;
833 double* b_begin;
834 int lda, inca;
835 int ldb, incb;
836 int n_iter;
837 int n_elem;
838 int j;
840
841 // Return early if possible.
842 if ( bl1_zero_dim2( m, n ) ) return;
843
844 // Handle cases where A and B are vectors to ensure that the underlying copy
845 // gets invoked only once.
846 if ( bl1_is_vector( m, n ) )
847 {
848 // Initialize with values appropriate for vectors.
849 n_iter = 1;
850 n_elem = bl1_vector_dim( m, n );
851 lda = 1; // multiplied by zero when n_iter == 1; not needed.
852 inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
853 ldb = 1; // multiplied by zero when n_iter == 1; not needed.
855 }
856 else // matrix case
857 {
858 // Initialize with optimal values for column-major storage of B.
859 n_iter = n;
860 n_elem = m;
861 lda = a_cs;
862 inca = a_rs;
863 ldb = b_cs;
864 incb = b_rs;
865
866 // Handle the transposition of A.
867 if ( bl1_does_trans( trans ) )
868 {
870 }
871
872 // An optimization: if B is row-major, then let's access the matrix by rows
873 // instead of by columns for increased spatial locality.
874 if ( bl1_is_row_storage( b_rs, b_cs ) )
875 {
879 }
880 }
881
882 // Extract conj component from trans parameter.
884
885 for ( j = 0; j < n_iter; ++j )
886 {
887 a_begin = a + j*lda;
888 b_begin = b + j*ldb;
889
891 n_elem,
892 a_begin, inca,
893 b_begin, incb );
894 }
895}

References bl1_dcopyv(), bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

◆ bl1_ddot()

void bl1_ddot ( conj1_t  conj,
int  n,
double x,
int  incx,
double y,
int  incy,
double rho 
)

◆ bl1_ddot2s()

void bl1_ddot2s ( conj1_t  conj,
int  n,
double alpha,
double x,
int  incx,
double y,
int  incy,
double beta,
double rho 
)

◆ bl1_ddots()

void bl1_ddots ( conj1_t  conj,
int  n,
double alpha,
double x,
int  incx,
double y,
int  incy,
double beta,
double rho 
)

◆ bl1_dfnorm()

void bl1_dfnorm ( int  m,
int  n,
double a,
int  a_rs,
int  a_cs,
double norm 
)
68{
69 double* a_ij;
70 double sum;
71 int lda, inca;
72 int n_iter;
73 int n_elem;
74 int i, j;
75
76 // Return early if possible.
77 if ( bl1_zero_dim2( m, n ) ) return;
78
79 // Handle cases where A is a vector separately.
80 if ( bl1_is_vector( m, n ) )
81 {
82 // Initialize with values appropriate for vectors.
83 n_iter = 1;
84 n_elem = bl1_vector_dim( m, n );
85 lda = 1; // multiplied by zero when n_iter == 1; not needed.
87 }
88 else // matrix case
89 {
90 // Initialize with optimal values for column-major storage.
91 n_iter = n;
92 n_elem = m;
93 lda = a_cs;
94 inca = a_rs;
95
96 // An optimization: if A is row-major, then let's access the matrix by
97 // rows instead of by columns for increased spatial locality.
99 {
102 }
103 }
104
105 // Initialize the accumulator variable.
106 sum = 0.0;
107
108 for ( j = 0; j < n_iter; j++ )
109 {
110 for ( i = 0; i < n_elem; i++ )
111 {
112 a_ij = a + i*inca + j*lda;
113 sum += (*a_ij) * (*a_ij);
114 }
115 }
116
117 // Compute the norm and store the result.
118 *norm = sqrt( sum );
119}

References bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), BLIS1_NO_TRANSPOSE, and i.

Referenced by FLA_Norm_frob().

◆ bl1_dinvscalm()

void bl1_dinvscalm ( conj1_t  conj,
int  m,
int  n,
double alpha,
double a,
int  a_rs,
int  a_cs 
)
66{
67 double alpha_inv;
68 double* a_begin;
69 int lda, inca;
70 int n_iter;
71 int n_elem;
72 int j;
73
74 // Return early if possible.
75 if ( bl1_zero_dim2( m, n ) ) return;
76 if ( bl1_deq1( alpha ) ) return;
77
78 // Handle cases where A is a vector to ensure that the underlying axpy
79 // gets invoked only once.
80 if ( bl1_is_vector( m, n ) )
81 {
82 // Initialize with values appropriate for a vector.
83 n_iter = 1;
84 n_elem = bl1_vector_dim( m, n );
85 lda = 1; // multiplied by zero when n_iter == 1; not needed.
87 }
88 else // matrix case
89 {
90 // Initialize with optimal values for column-major storage.
91 n_iter = n;
92 n_elem = m;
93 lda = a_cs;
94 inca = a_rs;
95
96 // An optimization: if A is row-major, then let's access the matrix
97 // by rows instead of by columns to increase spatial locality.
99 {
102 }
103 }
104
106
107 for ( j = 0; j < n_iter; j++ )
108 {
109 a_begin = a + j*lda;
110
112 &alpha_inv,
113 a_begin, inca );
114 }
115}
void bl1_dinvert2s(conj1_t conj, double *alpha, double *beta)
Definition bl1_invert2s.c:20

References bl1_dinvert2s(), bl1_dscal(), bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Inv_scal_external(), and FLA_Inv_scalc_external().

◆ bl1_dinvscalv()

void bl1_dinvscalv ( conj1_t  conj,
int  n,
double alpha,
double x,
int  incx 
)

◆ bl1_dnrm2()

void bl1_dnrm2 ( int  n,
double x,
int  incx,
double norm 
)
25{
26#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
27 *norm = cblas_dnrm2( n,
28 x, incx );
29#else
30 *norm = F77_dnrm2( &n,
31 x, &incx );
32#endif
33}
double F77_dnrm2(int *n, double *x, int *incx)
double cblas_dnrm2(const int N, const double *X, const int incX)

References cblas_dnrm2(), and F77_dnrm2().

Referenced by FLA_Househ2_UT_l_opd(), FLA_Househ2s_UT_l_opd(), FLA_Househ3UD_UT_opd(), and FLA_Nrm2_external().

◆ bl1_dscal()

void bl1_dscal ( int  n,
double alpha,
double x,
int  incx 
)
27{
28#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
29 cblas_dscal( n,
30 *alpha,
31 x, incx );
32#else
33 F77_dscal( &n,
34 alpha,
35 x, &incx );
36#endif
37}
void F77_dscal(int *n, double *alpha, double *y, int *incy)
void cblas_dscal(const int N, const double alpha, double *X, const int incX)

References cblas_dscal(), and F77_dscal().

Referenced by bl1_daxpysmt(), bl1_daxpysv(), bl1_dinvscalm(), bl1_dinvscalv(), bl1_dscalm(), bl1_dscalmr(), bl1_dscalv(), bl1_zconjm(), bl1_zconjmr(), bl1_zconjv(), and FLA_SA_LU_unb().

◆ bl1_dscalm()

void bl1_dscalm ( conj1_t  conj,
int  m,
int  n,
double alpha,
double a,
int  a_rs,
int  a_cs 
)
66{
67 double alpha_conj;
68 double* a_begin;
69 int lda, inca;
70 int n_iter;
71 int n_elem;
72 int j;
73
74 // Return early if possible.
75 if ( bl1_zero_dim2( m, n ) ) return;
76 if ( bl1_deq1( alpha ) ) return;
77
78 // Handle cases where A is a vector to ensure that the underlying axpy
79 // gets invoked only once.
80 if ( bl1_is_vector( m, n ) )
81 {
82 // Initialize with values appropriate for a vector.
83 n_iter = 1;
84 n_elem = bl1_vector_dim( m, n );
85 lda = 1; // multiplied by zero when n_iter == 1; not needed.
87 }
88 else // matrix case
89 {
90 // Initialize with optimal values for column-major storage.
91 n_iter = n;
92 n_elem = m;
93 lda = a_cs;
94 inca = a_rs;
95
96 // An optimization: if A is row-major, then let's access the matrix
97 // by rows instead of by columns to increase spatial locality.
99 {
102 }
103 }
104
106
107 for ( j = 0; j < n_iter; j++ )
108 {
109 a_begin = a + j*lda;
110
112 &alpha_conj,
113 a_begin, inca );
114 }
115}

References bl1_dscal(), bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by bl1_dgemm(), bl1_dsymm(), bl1_dtrmmsx(), bl1_dtrsmsx(), FLA_Lyap_h_opd_var1(), FLA_Lyap_h_opd_var2(), FLA_Lyap_h_opd_var3(), FLA_Lyap_h_opd_var4(), FLA_Lyap_n_opd_var1(), FLA_Lyap_n_opd_var2(), FLA_Lyap_n_opd_var3(), FLA_Lyap_n_opd_var4(), FLA_Scal_external(), and FLA_Scalc_external().

◆ bl1_dscalmr()

void bl1_dscalmr ( uplo1_t  uplo,
int  m,
int  n,
double alpha,
double a,
int  a_rs,
int  a_cs 
)
70{
71 double* a_begin;
72 int lda, inca;
73 int n_iter;
74 int n_elem_max;
75 int n_elem;
76 int j;
77
78 // Return early if possible.
79 if ( bl1_zero_dim2( m, n ) ) return;
80 if ( bl1_deq1( alpha ) ) return;
81
82 // We initialize for column-major.
83 n_iter = n;
84 n_elem_max = m;
85 lda = a_cs;
86 inca = a_rs;
87
88 // An optimization: if A is row-major, then let's access the matrix
89 // by rows instead of by columns to increase spatial locality.
91 {
94 bl1_toggle_uplo( uplo );
95 }
96
97 if ( bl1_is_upper( uplo ) )
98 {
99 for ( j = 0; j < n_iter; j++ )
100 {
101 n_elem = bl1_min( j + 1, n_elem_max );
102 a_begin = a + j*lda;
103
105 alpha,
106 a_begin, inca );
107 }
108 }
109 else // if ( bl1_is_lower( uplo ) )
110 {
111 for ( j = 0; j < n_iter; j++ )
112 {
113 n_elem = bl1_max( 0, n_elem_max - j );
114 a_begin = a + j*lda + j*inca;
115
116 if ( n_elem <= 0 ) break;
117
119 alpha,
120 a_begin, inca );
121 }
122 }
123}

References bl1_dscal(), bl1_is_row_storage(), bl1_is_upper(), and bl1_zero_dim2().

Referenced by FLA_Scalr_external().

◆ bl1_dscalv()

void bl1_dscalv ( conj1_t  conj,
int  n,
double alpha,
double x,
int  incx 
)

◆ bl1_dscopymr()

void bl1_dscopymr ( uplo1_t  uplo,
int  m,
int  n,
double a,
int  a_rs,
int  a_cs,
float b,
int  b_rs,
int  b_cs 
)
399{
400 double* a_begin;
401 float* b_begin;
402 int lda, inca;
403 int ldb, incb;
404 int n_iter;
405 int n_elem_max;
406 int n_elem;
407 int j;
408
409 // Return early if possible.
410 if ( bl1_zero_dim2( m, n ) ) return;
411
412 // We initialize for column-major.
413 n_iter = n;
414 n_elem_max = m;
415 lda = a_cs;
416 inca = a_rs;
417 ldb = b_cs;
418 incb = b_rs;
419
420 // An optimization: if B is row-major, then let's access the matrix
421 // by rows instead of by columns for increased spatial locality.
422 if ( bl1_is_row_storage( b_rs, b_cs ) )
423 {
427 bl1_toggle_uplo( uplo );
428 }
429
430
431 if ( bl1_is_upper( uplo ) )
432 {
433 for ( j = 0; j < n_iter; j++ )
434 {
435 n_elem = bl1_min( j + 1, n_elem_max );
436 a_begin = a + j*lda;
437 b_begin = b + j*ldb;
438
440 n_elem,
441 a_begin, inca,
442 b_begin, incb );
443 }
444 }
445 else // if ( bl1_is_lower( uplo ) )
446 {
447 for ( j = 0; j < n_iter; j++ )
448 {
449 n_elem = bl1_max( 0, n_elem_max - j );
450 a_begin = a + j*lda + j*inca;
451 b_begin = b + j*ldb + j*incb;
452
453 if ( n_elem <= 0 ) break;
454
456 n_elem,
457 a_begin, inca,
458 b_begin, incb );
459 }
460 }
461}
void bl1_dscopyv(conj1_t conj, int m, double *x, int incx, float *y, int incy)
Definition bl1_copyv.c:101

References bl1_dscopyv(), bl1_is_row_storage(), bl1_is_upper(), bl1_zero_dim2(), and BLIS1_NO_CONJUGATE.

Referenced by FLA_Copyr_external().

◆ bl1_dscopymrt()

void bl1_dscopymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
double a,
int  a_rs,
int  a_cs,
float b,
int  b_rs,
int  b_cs 
)
861{
862 double* a_begin;
863 float* b_begin;
864 int lda, inca;
865 int ldb, incb;
866 int n_iter;
867 int n_elem;
868 int n_elem_max;
870 int j;
872
873 // Return early if possible.
874 if ( bl1_zero_dim2( m, n ) ) return;
875
876 // Initialize variables based on storage format of B and value of uplo.
877 if ( bl1_is_col_storage( b_rs, b_cs ) )
878 {
879 if ( bl1_is_lower( uplo ) )
880 {
881 n_iter = bl1_min( m, n );
882 n_elem_max = m;
883 lda = a_cs;
884 inca = a_rs;
885 ldb = b_cs;
886 incb = b_rs;
888 }
889 else // if ( bl1_is_upper( uplo ) )
890 {
891 n_iter = n;
892 n_elem_max = bl1_min( m, n );
893 lda = a_cs;
894 inca = a_rs;
895 ldb = b_cs;
896 incb = b_rs;
898 }
899 }
900 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
901 {
902 if ( bl1_is_lower( uplo ) )
903 {
904 n_iter = m;
905 n_elem_max = bl1_min( m, n );
906 lda = a_rs;
907 inca = a_cs;
908 ldb = b_rs;
909 incb = b_cs;
911 }
912 else // if ( bl1_is_upper( uplo ) )
913 {
914 n_iter = bl1_min( m, n );
915 n_elem_max = n;
916 lda = a_rs;
917 inca = a_cs;
918 ldb = b_rs;
919 incb = b_cs;
921 }
922 }
923
924 // Swap lda and inca if we're doing a transpose.
925 if ( bl1_does_trans( trans ) )
926 {
928 }
929
930 // Extract conj component from trans parameter.
932
933 // Choose the loop based on whether n_elem will be shrinking or growing
934 // with each iteration.
936 {
937 for ( j = 0; j < n_iter; j++ )
938 {
939 n_elem = n_elem_max - j;
940 a_begin = a + j*lda + j*inca;
941 b_begin = b + j*ldb + j*incb;
942
944 n_elem,
945 a_begin, inca,
946 b_begin, incb );
947 }
948 }
949 else // if ( n_elem_is_ascending )
950 {
951 for ( j = 0; j < n_iter; j++ )
952 {
953 n_elem = bl1_min( j + 1, n_elem_max );
954 a_begin = a + j*lda;
955 b_begin = b + j*ldb;
956
958 n_elem,
959 a_begin, inca,
960 b_begin, incb );
961 }
962 }
963}

References bl1_does_trans(), bl1_dscopyv(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), and bl1_zero_dim2().

Referenced by FLA_Copyrt_external().

◆ bl1_dscopymt()

void bl1_dscopymt ( trans1_t  trans,
int  m,
int  n,
double a,
int  a_rs,
int  a_cs,
float b,
int  b_rs,
int  b_cs 
)
495{
496 double* a_begin;
497 float* b_begin;
498 int lda, inca;
499 int ldb, incb;
500 int n_iter;
501 int n_elem;
502 int j;
504
505 // Return early if possible.
506 if ( bl1_zero_dim2( m, n ) ) return;
507
508 // Handle cases where A and B are vectors to ensure that the underlying copy
509 // gets invoked only once.
510 if ( bl1_is_vector( m, n ) )
511 {
512 // Initialize with values appropriate for vectors.
513 n_iter = 1;
514 n_elem = bl1_vector_dim( m, n );
515 lda = 1; // multiplied by zero when n_iter == 1; not needed.
516 inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
517 ldb = 1; // multiplied by zero when n_iter == 1; not needed.
519 }
520 else // matrix case
521 {
522 // Initialize with optimal values for column-major storage of B.
523 n_iter = n;
524 n_elem = m;
525 lda = a_cs;
526 inca = a_rs;
527 ldb = b_cs;
528 incb = b_rs;
529
530 // Handle the transposition of A.
531 if ( bl1_does_trans( trans ) )
532 {
534 }
535
536 // An optimization: if B is row-major, then let's access the matrix by rows
537 // instead of by columns for increased spatial locality.
538 if ( bl1_is_row_storage( b_rs, b_cs ) )
539 {
543 }
544 }
545
546 // Extract conj component from trans parameter.
548
549 for ( j = 0; j < n_iter; ++j )
550 {
551 a_begin = a + j*lda;
552 b_begin = b + j*ldb;
553
555 n_elem,
556 a_begin, inca,
557 b_begin, incb );
558 }
559}

References bl1_does_trans(), bl1_dscopyv(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Copy_external(), and FLA_Copyt_external().

◆ bl1_dscopyv()

void bl1_dscopyv ( conj1_t  conj,
int  m,
double x,
int  incx,
float y,
int  incy 
)
102{
103 double* chi;
104 float* psi;
105 int i;
106
107 // Return early if possible.
108 if ( bl1_zero_dim1( m ) ) return;
109
110 // Initialize pointers.
111 chi = x;
112 psi = y;
113
114 for ( i = 0; i < m; ++i )
115 {
116 *psi = *chi;
117
118 chi += incx;
119 psi += incy;
120 }
121}

References bl1_zero_dim1(), and i.

Referenced by bl1_dscopymr(), bl1_dscopymrt(), and bl1_dscopymt().

◆ bl1_dswap()

void bl1_dswap ( int  n,
double x,
int  incx,
double y,
int  incy 
)
27{
28#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
29 cblas_dswap( n,
30 x, incx,
31 y, incy );
32#else
33 F77_dswap( &n,
34 x, &incx,
35 y, &incy );
36#endif
37}
void F77_dswap(int *n, double *x, int *incx, double *y, int *incy)
void cblas_dswap(const int N, double *X, const int incX, double *Y, const int incY)

References cblas_dswap(), and F77_dswap().

Referenced by bl1_dswapmt(), bl1_dswapv(), FLA_SA_Apply_pivots(), and FLA_SA_LU_unb().

◆ bl1_dswapmt()

void bl1_dswapmt ( trans1_t  trans,
int  m,
int  n,
double a,
int  a_rs,
int  a_cs,
double b,
int  b_rs,
int  b_cs 
)
81{
82 double* a_begin;
83 double* b_begin;
84 int lda, inca;
85 int ldb, incb;
86 int n_iter;
87 int n_elem;
88 int j;
89
90 // Return early if possible.
91 if ( bl1_zero_dim2( m, n ) ) return;
92
93 // Handle cases where A and B are vectors to ensure that the underlying copy
94 // gets invoked only once.
95 if ( bl1_is_vector( m, n ) )
96 {
97 // Initialize with values appropriate for vectors.
98 n_iter = 1;
99 n_elem = bl1_vector_dim( m, n );
100 lda = 1; // multiplied by zero when n_iter == 1; not needed.
101 inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
102 ldb = 1; // multiplied by zero when n_iter == 1; not needed.
104 }
105 else // matrix case
106 {
107 // Initialize with optimal values for column-major storage.
108 n_iter = n;
109 n_elem = m;
110 lda = a_cs;
111 inca = a_rs;
112 ldb = b_cs;
113 incb = b_rs;
114
115 // Handle the transposition of A.
116 if ( bl1_does_trans( trans ) )
117 {
119 }
120
121 // An optimization: if B is row-major and if A is effectively row-major
122 // after a possible transposition, then let's access the matrix by rows
123 // instead of by columns for increased spatial locality.
124 if ( bl1_is_row_storage( b_rs, b_cs ) )
125 {
126 if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
128 {
132 }
133 }
134 }
135
136 for ( j = 0; j < n_iter; j++ )
137 {
138 a_begin = a + j*lda;
139 b_begin = b + j*ldb;
140
142 a_begin, inca,
143 b_begin, incb );
144 }
145}
void bl1_dswap(int n, double *x, int incx, double *y, int incy)
Definition bl1_swap.c:26

References bl1_does_notrans(), bl1_does_trans(), bl1_dswap(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Swap_external(), and FLA_Swapt_external().

◆ bl1_dswapv()

void bl1_dswapv ( int  n,
double x,
int  incx,
double y,
int  incy 
)

◆ bl1_dzcopymr()

void bl1_dzcopymr ( uplo1_t  uplo,
int  m,
int  n,
double a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)
921{
922 double* a_begin;
924 int lda, inca;
925 int ldb, incb;
926 int n_iter;
927 int n_elem_max;
928 int n_elem;
929 int j;
930
931 // Return early if possible.
932 if ( bl1_zero_dim2( m, n ) ) return;
933
934 // We initialize for column-major.
935 n_iter = n;
936 n_elem_max = m;
937 lda = a_cs;
938 inca = a_rs;
939 ldb = b_cs;
940 incb = b_rs;
941
942 // An optimization: if B is row-major, then let's access the matrix
943 // by rows instead of by columns for increased spatial locality.
944 if ( bl1_is_row_storage( b_rs, b_cs ) )
945 {
949 bl1_toggle_uplo( uplo );
950 }
951
952
953 if ( bl1_is_upper( uplo ) )
954 {
955 for ( j = 0; j < n_iter; j++ )
956 {
957 n_elem = bl1_min( j + 1, n_elem_max );
958 a_begin = a + j*lda;
959 b_begin = b + j*ldb;
960
962 n_elem,
963 a_begin, inca,
964 b_begin, incb );
965 }
966 }
967 else // if ( bl1_is_lower( uplo ) )
968 {
969 for ( j = 0; j < n_iter; j++ )
970 {
971 n_elem = bl1_max( 0, n_elem_max - j );
972 a_begin = a + j*lda + j*inca;
973 b_begin = b + j*ldb + j*incb;
974
975 if ( n_elem <= 0 ) break;
976
978 n_elem,
979 a_begin, inca,
980 b_begin, incb );
981 }
982 }
983}
void bl1_dzcopyv(conj1_t conj, int m, double *x, int incx, dcomplex *y, int incy)
Definition bl1_copyv.c:259

References bl1_dzcopyv(), bl1_is_row_storage(), bl1_is_upper(), bl1_zero_dim2(), and BLIS1_NO_CONJUGATE.

Referenced by FLA_Copyr_external().

◆ bl1_dzcopymrt()

void bl1_dzcopymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
double a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)
1179{
1180 double* a_begin;
1182 int lda, inca;
1183 int ldb, incb;
1184 int n_iter;
1185 int n_elem;
1186 int n_elem_max;
1188 int j;
1189 conj1_t conj;
1190
1191 // Return early if possible.
1192 if ( bl1_zero_dim2( m, n ) ) return;
1193
1194 // Initialize variables based on storage format of B and value of uplo.
1195 if ( bl1_is_col_storage( b_rs, b_cs ) )
1196 {
1197 if ( bl1_is_lower( uplo ) )
1198 {
1199 n_iter = bl1_min( m, n );
1200 n_elem_max = m;
1201 lda = a_cs;
1202 inca = a_rs;
1203 ldb = b_cs;
1204 incb = b_rs;
1206 }
1207 else // if ( bl1_is_upper( uplo ) )
1208 {
1209 n_iter = n;
1210 n_elem_max = bl1_min( m, n );
1211 lda = a_cs;
1212 inca = a_rs;
1213 ldb = b_cs;
1214 incb = b_rs;
1216 }
1217 }
1218 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
1219 {
1220 if ( bl1_is_lower( uplo ) )
1221 {
1222 n_iter = m;
1223 n_elem_max = bl1_min( m, n );
1224 lda = a_rs;
1225 inca = a_cs;
1226 ldb = b_rs;
1227 incb = b_cs;
1229 }
1230 else // if ( bl1_is_upper( uplo ) )
1231 {
1232 n_iter = bl1_min( m, n );
1233 n_elem_max = n;
1234 lda = a_rs;
1235 inca = a_cs;
1236 ldb = b_rs;
1237 incb = b_cs;
1239 }
1240 }
1241
1242 // Swap lda and inca if we're doing a transpose.
1243 if ( bl1_does_trans( trans ) )
1244 {
1246 }
1247
1248 // Extract conj component from trans parameter.
1250
1251 // Choose the loop based on whether n_elem will be shrinking or growing
1252 // with each iteration.
1254 {
1255 for ( j = 0; j < n_iter; j++ )
1256 {
1257 n_elem = n_elem_max - j;
1258 a_begin = a + j*lda + j*inca;
1259 b_begin = b + j*ldb + j*incb;
1260
1262 n_elem,
1263 a_begin, inca,
1264 b_begin, incb );
1265 }
1266 }
1267 else // if ( n_elem_is_ascending )
1268 {
1269 for ( j = 0; j < n_iter; j++ )
1270 {
1271 n_elem = bl1_min( j + 1, n_elem_max );
1272 a_begin = a + j*lda;
1273 b_begin = b + j*ldb;
1274
1276 n_elem,
1277 a_begin, inca,
1278 b_begin, incb );
1279 }
1280 }
1281}

References bl1_does_trans(), bl1_dzcopyv(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), and bl1_zero_dim2().

Referenced by FLA_Copyrt_external().

◆ bl1_dzcopymt()

void bl1_dzcopymt ( trans1_t  trans,
int  m,
int  n,
double a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)
1033{
1034 double* a_begin;
1036 int lda, inca;
1037 int ldb, incb;
1038 int n_iter;
1039 int n_elem;
1040 int j;
1041 conj1_t conj;
1042
1043 // Return early if possible.
1044 if ( bl1_zero_dim2( m, n ) ) return;
1045
1046 // Handle cases where A and B are vectors to ensure that the underlying copy
1047 // gets invoked only once.
1048 if ( bl1_is_vector( m, n ) )
1049 {
1050 // Initialize with values appropriate for vectors.
1051 n_iter = 1;
1052 n_elem = bl1_vector_dim( m, n );
1053 lda = 1; // multiplied by zero when n_iter == 1; not needed.
1054 inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
1055 ldb = 1; // multiplied by zero when n_iter == 1; not needed.
1057 }
1058 else // matrix case
1059 {
1060 // Initialize with optimal values for column-major storage of B.
1061 n_iter = n;
1062 n_elem = m;
1063 lda = a_cs;
1064 inca = a_rs;
1065 ldb = b_cs;
1066 incb = b_rs;
1067
1068 // Handle the transposition of A.
1069 if ( bl1_does_trans( trans ) )
1070 {
1072 }
1073
1074 // An optimization: if B is row-major, then let's access the matrix by rows
1075 // instead of by columns for increased spatial locality.
1076 if ( bl1_is_row_storage( b_rs, b_cs ) )
1077 {
1081 }
1082 }
1083
1084 // Extract conj component from trans parameter.
1086
1087 for ( j = 0; j < n_iter; ++j )
1088 {
1089 a_begin = a + j*lda;
1090 b_begin = b + j*ldb;
1091
1093 n_elem,
1094 a_begin, inca,
1095 b_begin, incb );
1096 }
1097}

References bl1_does_trans(), bl1_dzcopyv(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Copy_external(), and FLA_Copyt_external().

◆ bl1_dzcopyv()

void bl1_dzcopyv ( conj1_t  conj,
int  m,
double x,
int  incx,
dcomplex y,
int  incy 
)
260{
261 double* chi;
262 dcomplex* psi;
263 int i;
264
265 // Return early if possible.
266 if ( bl1_zero_dim1( m ) ) return;
267
268 // Initialize pointers.
269 chi = x;
270 psi = y;
271
272 for ( i = 0; i < m; ++i )
273 {
274 psi->real = *chi;
275 psi->imag = 0.0;
276
277 chi += incx;
278 psi += incy;
279 }
280}

References bl1_zero_dim1(), i, dcomplex::imag, and dcomplex::real.

Referenced by bl1_dzcopymr(), bl1_dzcopymrt(), and bl1_dzcopymt().

◆ bl1_icopymt()

void bl1_icopymt ( trans1_t  trans,
int  m,
int  n,
int a,
int  a_rs,
int  a_cs,
int b,
int  b_rs,
int  b_cs 
)
14{
15 int* a_begin;
16 int* b_begin;
17 int lda, inca;
18 int ldb, incb;
19 int n_iter;
20 int n_elem;
21 int j;
22
23 // Return early if possible.
24 if ( bl1_zero_dim2( m, n ) ) return;
25
26 // Handle cases where A and B are vectors to ensure that the underlying copy
27 // gets invoked only once.
28 if ( bl1_is_vector( m, n ) )
29 {
30 // Initialize with values appropriate for vectors.
31 n_iter = 1;
32 n_elem = bl1_vector_dim( m, n );
33 lda = 1; // multiplied by zero when n_iter == 1; not needed.
34 inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
35 ldb = 1; // multiplied by zero when n_iter == 1; not needed.
37 }
38 else // matrix case
39 {
40 // Initialize with optimal values for column-major storage.
41 n_iter = n;
42 n_elem = m;
43 lda = a_cs;
44 inca = a_rs;
45 ldb = b_cs;
46 incb = b_rs;
47
48 // Handle the transposition of A.
49 if ( bl1_does_trans( trans ) )
50 {
52 }
53
54 // An optimization: if B is row-major and if A is effectively row-major
55 // after a possible transposition, then let's access the matrix by rows
56 // instead of by columns for increased spatial locality.
58 {
59 if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
61 {
65 }
66 }
67 }
68
69 for ( j = 0; j < n_iter; j++ )
70 {
71 a_begin = a + j*lda;
72 b_begin = b + j*ldb;
73
75 n_elem,
76 a_begin, inca,
77 b_begin, incb );
78 }
79}
void bl1_icopyv(conj1_t conj, int m, int *x, int incx, int *y, int incy)
Definition bl1_copyv.c:13

References bl1_does_notrans(), bl1_does_trans(), bl1_icopyv(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Copy_external(), and FLA_Copyt_external().

◆ bl1_icopyv()

void bl1_icopyv ( conj1_t  conj,
int  m,
int x,
int  incx,
int y,
int  incy 
)
14{
15 int* chi;
16 int* psi;
17 int i;
18
19 // Return early if possible.
20 if ( bl1_zero_dim1( m ) ) return;
21
22 // Initialize pointers.
23 chi = x;
24 psi = y;
25
26 for ( i = 0; i < m; ++i )
27 {
28 *psi = *chi;
29
30 chi += incx;
31 psi += incy;
32 }
33}

References bl1_zero_dim1(), and i.

Referenced by bl1_icopymt().

◆ bl1_samax()

void bl1_samax ( int  n,
float x,
int  incx,
int index 
)
14{
15#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
16 *index = cblas_isamax( n,
17 x, incx );
18#else
19 *index = F77_isamax( &n,
20 x, &incx ) - 1;
21#endif
22}
int F77_isamax(int *n, float *x, int *incx)
CBLAS_INDEX cblas_isamax(const int N, const float *X, const int incX)

References cblas_isamax(), and F77_isamax().

Referenced by FLA_Amax_external(), FLA_LU_piv_ops_var3(), FLA_LU_piv_ops_var4(), FLA_LU_piv_ops_var5(), and FLA_SA_LU_unb().

◆ bl1_sasum()

void bl1_sasum ( int  n,
float x,
int  incx,
float norm 
)
14{
15#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
16 *norm = cblas_sasum( n,
17 x, incx );
18#else
19 *norm = F77_sasum( &n,
20 x, &incx );
21#endif
22}
float F77_sasum(int *n, float *x, int *incx)
float cblas_sasum(const int N, const float *X, const int incX)

References cblas_sasum(), and F77_sasum().

Referenced by FLA_Asum_external().

◆ bl1_saxpy()

void bl1_saxpy ( int  n,
float alpha,
float x,
int  incx,
float y,
int  incy 
)
14{
15#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
16 cblas_saxpy( n,
17 *alpha,
18 x, incx,
19 y, incy );
20#else
21 F77_saxpy( &n,
22 alpha,
23 x, &incx,
24 y, &incy );
25#endif
26}
void F77_saxpy(int *n, float *alpha, float *x, int *incx, float *y, int *incy)
void cblas_saxpy(const int N, const float alpha, const float *X, const int incX, float *Y, const int incY)

References cblas_saxpy(), and F77_saxpy().

Referenced by bl1_saxpymt(), bl1_saxpysmt(), bl1_saxpysv(), and bl1_saxpyv().

◆ bl1_saxpymrt()

void bl1_saxpymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
float alpha,
float a,
int  a_rs,
int  a_cs,
float b,
int  b_rs,
int  b_cs 
)
14{
15 float* a_begin;
16 float* b_begin;
17 int lda, inca;
18 int ldb, incb;
19 int n_iter;
20 int n_elem;
21 int n_elem_max;
23 int j;
25
26 // Return early if possible.
27 if ( bl1_zero_dim2( m, n ) ) return;
28
29 // Initialize variables based on storage format of B and value of uplo.
31 {
32 if ( bl1_is_lower( uplo ) )
33 {
34 n_iter = bl1_min( m, n );
35 n_elem_max = m;
36 lda = a_cs;
37 inca = a_rs;
38 ldb = b_cs;
39 incb = b_rs;
41 }
42 else // if ( bl1_is_upper( uplo ) )
43 {
44 n_iter = n;
45 n_elem_max = bl1_min( m, n );
46 lda = a_cs;
47 inca = a_rs;
48 ldb = b_cs;
49 incb = b_rs;
51 }
52 }
53 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
54 {
55 if ( bl1_is_lower( uplo ) )
56 {
57 n_iter = m;
58 n_elem_max = bl1_min( m, n );
59 lda = a_rs;
60 inca = a_cs;
61 ldb = b_rs;
62 incb = b_cs;
64 }
65 else // if ( bl1_is_upper( uplo ) )
66 {
67 n_iter = bl1_min( m, n );
68 n_elem_max = n;
69 lda = a_rs;
70 inca = a_cs;
71 ldb = b_rs;
72 incb = b_cs;
74 }
75 }
76
77 // Swap lda and inca if we're doing a transpose.
78 if ( bl1_does_trans( trans ) )
79 {
81 }
82
83 // Extract conj component from trans parameter.
85
86 // Choose the loop based on whether n_elem will be shrinking or growing
87 // with each iteration.
89 {
90 for ( j = 0; j < n_iter; j++ )
91 {
93 a_begin = a + j*lda + j*inca;
94 b_begin = b + j*ldb + j*incb;
95
97 n_elem,
98 alpha,
100 b_begin, incb );
101 }
102 }
103 else // if ( n_elem_is_ascending )
104 {
105 for ( j = 0; j < n_iter; j++ )
106 {
107 n_elem = bl1_min( j + 1, n_elem_max );
108 a_begin = a + j*lda;
109 b_begin = b + j*ldb;
110
112 n_elem,
113 alpha,
114 a_begin, inca,
115 b_begin, incb );
116 }
117 }
118}
void bl1_saxpyv(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy)
Definition bl1_axpyv.c:13

References bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), bl1_saxpyv(), and bl1_zero_dim2().

Referenced by FLA_Axpyrt_external().

◆ bl1_saxpymt()

void bl1_saxpymt ( trans1_t  trans,
int  m,
int  n,
float alpha,
float a,
int  a_rs,
int  a_cs,
float b,
int  b_rs,
int  b_cs 
)
14{
15 float* a_begin;
16 float* b_begin;
17 int lda, inca;
18 int ldb, incb;
19 int n_iter;
20 int n_elem;
21 int j;
22
23 // Return early if possible.
24 if ( bl1_zero_dim2( m, n ) ) return;
25
26 // Handle cases where A and B are vectors to ensure that the underlying axpy
27 // gets invoked only once.
28 if ( bl1_is_vector( m, n ) )
29 {
30 // Initialize with values appropriate for vectors.
31 n_iter = 1;
32 n_elem = bl1_vector_dim( m, n );
33 lda = 1; // multiplied by zero when n_iter == 1; not needed.
34 inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
35 ldb = 1; // multiplied by zero when n_iter == 1; not needed.
37 }
38 else // matrix case
39 {
40 // Initialize with optimal values for column-major storage.
41 n_iter = n;
42 n_elem = m;
43 lda = a_cs;
44 inca = a_rs;
45 ldb = b_cs;
46 incb = b_rs;
47
48 // Handle the transposition of A.
49 if ( bl1_does_trans( trans ) )
50 {
52 }
53
54 // An optimization: if B is row-major and if A is effectively row-major
55 // after a possible transposition, then let's access the matrices by rows
56 // instead of by columns for increased spatial locality.
58 {
59 if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
61 {
65 }
66 }
67 }
68
69 for ( j = 0; j < n_iter; j++ )
70 {
71 a_begin = a + j*lda;
72 b_begin = b + j*ldb;
73
75 alpha,
76 a_begin, inca,
77 b_begin, incb );
78 }
79}
void bl1_saxpy(int n, float *alpha, float *x, int incx, float *y, int incy)
Definition bl1_axpy.c:13

References bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_saxpy(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by bl1_sgemm(), bl1_ssymm(), bl1_strmmsx(), bl1_strsmsx(), FLA_Axpy_external(), and FLA_Axpyt_external().

◆ bl1_saxpysmt()

void bl1_saxpysmt ( trans1_t  trans,
int  m,
int  n,
float alpha0,
float alpha1,
float a,
int  a_rs,
int  a_cs,
float beta,
float b,
int  b_rs,
int  b_cs 
)
14{
15 float* a_begin;
16 float* b_begin;
17 float alpha_prod;
18 int lda, inca;
19 int ldb, incb;
20 int n_iter;
21 int n_elem;
22 int j;
23
24 // Return early if possible.
25 if ( bl1_zero_dim2( m, n ) ) return;
26
27 alpha_prod = (*alpha0) * (*alpha1);
28
29 // Handle cases where A and B are vectors to ensure that the underlying axpy
30 // gets invoked only once.
31 if ( bl1_is_vector( m, n ) )
32 {
33 // Initialize with values appropriate for vectors.
34 n_iter = 1;
35 n_elem = bl1_vector_dim( m, n );
36 lda = 1; // multiplied by zero when n_iter == 1; not needed.
37 inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
38 ldb = 1; // multiplied by zero when n_iter == 1; not needed.
40 }
41 else // matrix case
42 {
43 // Initialize with optimal values for column-major storage.
44 n_iter = n;
45 n_elem = m;
46 lda = a_cs;
47 inca = a_rs;
48 ldb = b_cs;
49 incb = b_rs;
50
51 // Handle the transposition of A.
52 if ( bl1_does_trans( trans ) )
53 {
55 }
56
57 // An optimization: if B is row-major and if A is effectively row-major
58 // after a possible transposition, then let's access the matrices by rows
59 // instead of by columns for increased spatial locality.
61 {
62 if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
64 {
68 }
69 }
70 }
71
72 for ( j = 0; j < n_iter; j++ )
73 {
74 a_begin = a + j*lda;
75 b_begin = b + j*ldb;
76
78 beta,
79 b_begin, incb );
80
83 a_begin, inca,
84 b_begin, incb );
85 }
86}

References bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_saxpy(), bl1_sscal(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Axpys_external().

◆ bl1_saxpysv()

void bl1_saxpysv ( int  n,
float alpha0,
float alpha1,
float x,
int  incx,
float beta,
float y,
int  incy 
)
14{
15 float alpha_prod;
16
17 // Return early if possible.
18 if ( bl1_zero_dim1( n ) ) return;
19
20 alpha_prod = (*alpha0) * (*alpha1);
21
22 bl1_sscal( n,
23 beta,
24 y, incy );
25
26 bl1_saxpy( n,
28 x, incx,
29 y, incy );
30}

References bl1_saxpy(), bl1_sscal(), and bl1_zero_dim1().

Referenced by FLA_Lyap_h_ops_var2(), FLA_Lyap_h_ops_var3(), FLA_Lyap_h_ops_var4(), FLA_Lyap_n_ops_var2(), FLA_Lyap_n_ops_var3(), and FLA_Lyap_n_ops_var4().

◆ bl1_saxpyv()

void bl1_saxpyv ( conj1_t  conj,
int  n,
float alpha,
float x,
int  incx,
float y,
int  incy 
)
14{
15 bl1_saxpy( n,
16 alpha,
17 x, incx,
18 y, incy );
19}

References bl1_saxpy().

Referenced by bl1_saxpymrt(), bl1_strmvsx(), bl1_strsvsx(), FLA_Apply_H2_UT_l_ops_var1(), FLA_Apply_H2_UT_r_ops_var1(), FLA_Apply_HUD_UT_l_ops_var1(), FLA_Bidiag_UT_u_step_ofs_var2(), FLA_Bidiag_UT_u_step_ofs_var3(), FLA_Bidiag_UT_u_step_ofs_var4(), FLA_Bidiag_UT_u_step_ops_var2(), FLA_Bidiag_UT_u_step_ops_var3(), FLA_Bidiag_UT_u_step_ops_var4(), FLA_Bidiag_UT_u_step_ops_var5(), FLA_Eig_gest_il_ops_var1(), FLA_Eig_gest_il_ops_var2(), FLA_Eig_gest_il_ops_var3(), FLA_Eig_gest_il_ops_var4(), FLA_Eig_gest_il_ops_var5(), FLA_Eig_gest_iu_ops_var1(), FLA_Eig_gest_iu_ops_var2(), FLA_Eig_gest_iu_ops_var3(), FLA_Eig_gest_iu_ops_var4(), FLA_Eig_gest_iu_ops_var5(), FLA_Eig_gest_nl_ops_var1(), FLA_Eig_gest_nl_ops_var2(), FLA_Eig_gest_nl_ops_var4(), FLA_Eig_gest_nl_ops_var5(), FLA_Eig_gest_nu_ops_var1(), FLA_Eig_gest_nu_ops_var2(), FLA_Eig_gest_nu_ops_var4(), FLA_Eig_gest_nu_ops_var5(), FLA_Fused_Ahx_Ax_ops_var1(), FLA_Fused_Ahx_Axpy_Ax_ops_var1(), FLA_Fused_Gerc2_Ahx_Ax_ops_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_ops_var1(), FLA_Fused_Gerc2_ops_var1(), FLA_Fused_Her2_Ax_l_ops_var1(), FLA_Fused_Uhu_Yhu_Zhu_ops_var1(), FLA_Fused_UYx_ZVx_ops_var1(), FLA_Fused_UZhu_ZUhu_ops_var1(), FLA_Hess_UT_step_ofs_var2(), FLA_Hess_UT_step_ofs_var3(), FLA_Hess_UT_step_ofs_var4(), FLA_Hess_UT_step_ops_var2(), FLA_Hess_UT_step_ops_var3(), FLA_Hess_UT_step_ops_var4(), FLA_Hess_UT_step_ops_var5(), FLA_Tridiag_UT_l_step_ofs_var2(), FLA_Tridiag_UT_l_step_ofs_var3(), FLA_Tridiag_UT_l_step_ops_var1(), FLA_Tridiag_UT_l_step_ops_var2(), and FLA_Tridiag_UT_l_step_ops_var3().

◆ bl1_sccopymr()

void bl1_sccopymr ( uplo1_t  uplo,
int  m,
int  n,
float a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)
465{
466 float* a_begin;
468 int lda, inca;
469 int ldb, incb;
470 int n_iter;
471 int n_elem_max;
472 int n_elem;
473 int j;
474
475 // Return early if possible.
476 if ( bl1_zero_dim2( m, n ) ) return;
477
478 // We initialize for column-major.
479 n_iter = n;
480 n_elem_max = m;
481 lda = a_cs;
482 inca = a_rs;
483 ldb = b_cs;
484 incb = b_rs;
485
486 // An optimization: if B is row-major, then let's access the matrix
487 // by rows instead of by columns for increased spatial locality.
488 if ( bl1_is_row_storage( b_rs, b_cs ) )
489 {
493 bl1_toggle_uplo( uplo );
494 }
495
496
497 if ( bl1_is_upper( uplo ) )
498 {
499 for ( j = 0; j < n_iter; j++ )
500 {
501 n_elem = bl1_min( j + 1, n_elem_max );
502 a_begin = a + j*lda;
503 b_begin = b + j*ldb;
504
506 n_elem,
507 a_begin, inca,
508 b_begin, incb );
509 }
510 }
511 else // if ( bl1_is_lower( uplo ) )
512 {
513 for ( j = 0; j < n_iter; j++ )
514 {
515 n_elem = bl1_max( 0, n_elem_max - j );
516 a_begin = a + j*lda + j*inca;
517 b_begin = b + j*ldb + j*incb;
518
519 if ( n_elem <= 0 ) break;
520
522 n_elem,
523 a_begin, inca,
524 b_begin, incb );
525 }
526 }
527}
void bl1_sccopyv(conj1_t conj, int m, float *x, int incx, scomplex *y, int incy)
Definition bl1_copyv.c:124

References bl1_is_row_storage(), bl1_is_upper(), bl1_sccopyv(), bl1_zero_dim2(), and BLIS1_NO_CONJUGATE.

Referenced by FLA_Copyr_external().

◆ bl1_sccopymrt()

void bl1_sccopymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
float a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)
649{
650 float* a_begin;
652 int lda, inca;
653 int ldb, incb;
654 int n_iter;
655 int n_elem;
656 int n_elem_max;
658 int j;
660
661 // Return early if possible.
662 if ( bl1_zero_dim2( m, n ) ) return;
663
664 // Initialize variables based on storage format of B and value of uplo.
665 if ( bl1_is_col_storage( b_rs, b_cs ) )
666 {
667 if ( bl1_is_lower( uplo ) )
668 {
669 n_iter = bl1_min( m, n );
670 n_elem_max = m;
671 lda = a_cs;
672 inca = a_rs;
673 ldb = b_cs;
674 incb = b_rs;
676 }
677 else // if ( bl1_is_upper( uplo ) )
678 {
679 n_iter = n;
680 n_elem_max = bl1_min( m, n );
681 lda = a_cs;
682 inca = a_rs;
683 ldb = b_cs;
684 incb = b_rs;
686 }
687 }
688 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
689 {
690 if ( bl1_is_lower( uplo ) )
691 {
692 n_iter = m;
693 n_elem_max = bl1_min( m, n );
694 lda = a_rs;
695 inca = a_cs;
696 ldb = b_rs;
697 incb = b_cs;
699 }
700 else // if ( bl1_is_upper( uplo ) )
701 {
702 n_iter = bl1_min( m, n );
703 n_elem_max = n;
704 lda = a_rs;
705 inca = a_cs;
706 ldb = b_rs;
707 incb = b_cs;
709 }
710 }
711
712 // Swap lda and inca if we're doing a transpose.
713 if ( bl1_does_trans( trans ) )
714 {
716 }
717
718 // Extract conj component from trans parameter.
720
721 // Choose the loop based on whether n_elem will be shrinking or growing
722 // with each iteration.
724 {
725 for ( j = 0; j < n_iter; j++ )
726 {
727 n_elem = n_elem_max - j;
728 a_begin = a + j*lda + j*inca;
729 b_begin = b + j*ldb + j*incb;
730
732 n_elem,
733 a_begin, inca,
734 b_begin, incb );
735 }
736 }
737 else // if ( n_elem_is_ascending )
738 {
739 for ( j = 0; j < n_iter; j++ )
740 {
741 n_elem = bl1_min( j + 1, n_elem_max );
742 a_begin = a + j*lda;
743 b_begin = b + j*ldb;
744
746 n_elem,
747 a_begin, inca,
748 b_begin, incb );
749 }
750 }
751}

References bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), bl1_sccopyv(), and bl1_zero_dim2().

Referenced by FLA_Copyrt_external().

◆ bl1_sccopymt()

void bl1_sccopymt ( trans1_t  trans,
int  m,
int  n,
float a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)
563{
564 float* a_begin;
566 int lda, inca;
567 int ldb, incb;
568 int n_iter;
569 int n_elem;
570 int j;
572
573 // Return early if possible.
574 if ( bl1_zero_dim2( m, n ) ) return;
575
576 // Handle cases where A and B are vectors to ensure that the underlying copy
577 // gets invoked only once.
578 if ( bl1_is_vector( m, n ) )
579 {
580 // Initialize with values appropriate for vectors.
581 n_iter = 1;
582 n_elem = bl1_vector_dim( m, n );
583 lda = 1; // multiplied by zero when n_iter == 1; not needed.
584 inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
585 ldb = 1; // multiplied by zero when n_iter == 1; not needed.
587 }
588 else // matrix case
589 {
590 // Initialize with optimal values for column-major storage of B.
591 n_iter = n;
592 n_elem = m;
593 lda = a_cs;
594 inca = a_rs;
595 ldb = b_cs;
596 incb = b_rs;
597
598 // Handle the transposition of A.
599 if ( bl1_does_trans( trans ) )
600 {
602 }
603
604 // An optimization: if B is row-major, then let's access the matrix by rows
605 // instead of by columns for increased spatial locality.
606 if ( bl1_is_row_storage( b_rs, b_cs ) )
607 {
611 }
612 }
613
614 // Extract conj component from trans parameter.
616
617 for ( j = 0; j < n_iter; ++j )
618 {
619 a_begin = a + j*lda;
620 b_begin = b + j*ldb;
621
623 n_elem,
624 a_begin, inca,
625 b_begin, incb );
626 }
627}

References bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_sccopyv(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Copy_external(), and FLA_Copyt_external().

◆ bl1_sccopyv()

void bl1_sccopyv ( conj1_t  conj,
int  m,
float x,
int  incx,
scomplex y,
int  incy 
)
125{
126 float* chi;
127 scomplex* psi;
128 int i;
129
130 // Return early if possible.
131 if ( bl1_zero_dim1( m ) ) return;
132
133 // Initialize pointers.
134 chi = x;
135 psi = y;
136
137 for ( i = 0; i < m; ++i )
138 {
139 psi->real = *chi;
140 psi->imag = 0.0F;
141
142 chi += incx;
143 psi += incy;
144 }
145}

References bl1_zero_dim1(), i, scomplex::imag, and scomplex::real.

Referenced by bl1_sccopymr(), bl1_sccopymrt(), and bl1_sccopymt().

◆ bl1_sconjm()

void bl1_sconjm ( int  m,
int  n,
float a,
int  a_rs,
int  a_cs 
)
14{
15 return;
16}

◆ bl1_sconjmr()

void bl1_sconjmr ( uplo1_t  uplo,
int  m,
int  n,
float a,
int  a_rs,
int  a_cs 
)
14{
15 return;
16}

◆ bl1_sconjv()

void bl1_sconjv ( int  m,
float x,
int  incx 
)

◆ bl1_scopy()

void bl1_scopy ( int  m,
float x,
int  incx,
float y,
int  incy 
)
14{
15#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
16 cblas_scopy( m,
17 x, incx,
18 y, incy );
19#else
20 F77_scopy( &m,
21 x, &incx,
22 y, &incy );
23#endif
24}
void F77_scopy(int *n, float *x, int *incx, float *y, int *incy)
void cblas_scopy(const int N, const float *X, const int incX, float *Y, const int incY)

References cblas_scopy(), and F77_scopy().

Referenced by bl1_scopymr(), bl1_scopymt(), bl1_scopyv(), FLA_Obj_extract_imag_part(), FLA_Obj_extract_real_part(), and FLA_SA_LU_unb().

◆ bl1_scopymr()

void bl1_scopymr ( uplo1_t  uplo,
int  m,
int  n,
float a,
int  a_rs,
int  a_cs,
float b,
int  b_rs,
int  b_cs 
)
14{
15 float* a_begin;
16 float* b_begin;
17 int lda, inca;
18 int ldb, incb;
19 int n_iter;
20 int n_elem_max;
21 int n_elem;
22 int j;
23
24 // Return early if possible.
25 if ( bl1_zero_dim2( m, n ) ) return;
26
27 // We initialize for column-major.
28 n_iter = n;
29 n_elem_max = m;
30 lda = a_cs;
31 inca = a_rs;
32 ldb = b_cs;
33 incb = b_rs;
34
35 // An optimization: if A and B are both row-major, then let's access the
36 // matrices by rows instead of by columns for increased spatial locality.
38 {
42 bl1_toggle_uplo( uplo );
43 }
44
45
46 if ( bl1_is_upper( uplo ) )
47 {
48 for ( j = 0; j < n_iter; j++ )
49 {
50 n_elem = bl1_min( j + 1, n_elem_max );
51 a_begin = a + j*lda;
52 b_begin = b + j*ldb;
53
55 a_begin, inca,
56 b_begin, incb );
57 }
58 }
59 else // if ( bl1_is_lower( uplo ) )
60 {
61 for ( j = 0; j < n_iter; j++ )
62 {
63 n_elem = bl1_max( 0, n_elem_max - j );
64 a_begin = a + j*lda + j*inca;
65 b_begin = b + j*ldb + j*incb;
66
67 if ( n_elem <= 0 ) break;
68
70 a_begin, inca,
71 b_begin, incb );
72 }
73 }
74}
void bl1_scopy(int m, float *x, int incx, float *y, int incy)
Definition bl1_copy.c:13

References bl1_is_row_storage(), bl1_is_upper(), bl1_scopy(), and bl1_zero_dim2().

Referenced by bl1_screate_contigmr(), bl1_sfree_saved_contigmr(), and FLA_Copyr_external().

◆ bl1_scopymrt()

void bl1_scopymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
float a,
int  a_rs,
int  a_cs,
float b,
int  b_rs,
int  b_cs 
)
14{
15 float* a_begin;
16 float* b_begin;
17 int lda, inca;
18 int ldb, incb;
19 int n_iter;
20 int n_elem;
21 int n_elem_max;
23 int j;
25
26 // Return early if possible.
27 if ( bl1_zero_dim2( m, n ) ) return;
28
29 // Initialize variables based on storage format of B and value of uplo.
31 {
32 if ( bl1_is_lower( uplo ) )
33 {
34 n_iter = bl1_min( m, n );
35 n_elem_max = m;
36 lda = a_cs;
37 inca = a_rs;
38 ldb = b_cs;
39 incb = b_rs;
41 }
42 else // if ( bl1_is_upper( uplo ) )
43 {
44 n_iter = n;
45 n_elem_max = bl1_min( m, n );
46 lda = a_cs;
47 inca = a_rs;
48 ldb = b_cs;
49 incb = b_rs;
51 }
52 }
53 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
54 {
55 if ( bl1_is_lower( uplo ) )
56 {
57 n_iter = m;
58 n_elem_max = bl1_min( m, n );
59 lda = a_rs;
60 inca = a_cs;
61 ldb = b_rs;
62 incb = b_cs;
64 }
65 else // if ( bl1_is_upper( uplo ) )
66 {
67 n_iter = bl1_min( m, n );
68 n_elem_max = n;
69 lda = a_rs;
70 inca = a_cs;
71 ldb = b_rs;
72 incb = b_cs;
74 }
75 }
76
77 // Swap lda and inca if we're doing a transpose.
78 if ( bl1_does_trans( trans ) )
79 {
81 }
82
83 // Extract conj component from trans parameter.
85
86 // Choose the loop based on whether n_elem will be shrinking or growing
87 // with each iteration.
89 {
90 for ( j = 0; j < n_iter; j++ )
91 {
93 a_begin = a + j*lda + j*inca;
94 b_begin = b + j*ldb + j*incb;
95
97 n_elem,
99 b_begin, incb );
100 }
101 }
102 else // if ( n_elem_is_ascending )
103 {
104 for ( j = 0; j < n_iter; j++ )
105 {
106 n_elem = bl1_min( j + 1, n_elem_max );
107 a_begin = a + j*lda;
108 b_begin = b + j*ldb;
109
111 n_elem,
112 a_begin, inca,
113 b_begin, incb );
114 }
115 }
116}
void bl1_scopyv(conj1_t conj, int m, float *x, int incx, float *y, int incy)
Definition bl1_copyv.c:35

References bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), bl1_scopyv(), and bl1_zero_dim2().

Referenced by FLA_Copyrt_external(), FLA_Lyap_h_ops_var1(), FLA_Lyap_h_ops_var2(), FLA_Lyap_h_ops_var3(), FLA_Lyap_h_ops_var4(), FLA_Lyap_n_ops_var1(), FLA_Lyap_n_ops_var2(), FLA_Lyap_n_ops_var3(), and FLA_Lyap_n_ops_var4().

◆ bl1_scopymt()

void bl1_scopymt ( trans1_t  trans,
int  m,
int  n,
float a,
int  a_rs,
int  a_cs,
float b,
int  b_rs,
int  b_cs 
)
82{
83 float* a_begin;
84 float* b_begin;
85 int lda, inca;
86 int ldb, incb;
87 int n_iter;
88 int n_elem;
89 int j;
90
91 // Return early if possible.
92 if ( bl1_zero_dim2( m, n ) ) return;
93
94 // Handle cases where A and B are vectors to ensure that the underlying copy
95 // gets invoked only once.
96 if ( bl1_is_vector( m, n ) )
97 {
98 // Initialize with values appropriate for vectors.
99 n_iter = 1;
100 n_elem = bl1_vector_dim( m, n );
101 lda = 1; // multiplied by zero when n_iter == 1; not needed.
102 inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
103 ldb = 1; // multiplied by zero when n_iter == 1; not needed.
105 }
106 else // matrix case
107 {
108 // Initialize with optimal values for column-major storage.
109 n_iter = n;
110 n_elem = m;
111 lda = a_cs;
112 inca = a_rs;
113 ldb = b_cs;
114 incb = b_rs;
115
116 // Handle the transposition of A.
117 if ( bl1_does_trans( trans ) )
118 {
120 }
121
122 // An optimization: if B is row-major and if A is effectively row-major
123 // after a possible transposition, then let's access the matrix by rows
124 // instead of by columns for increased spatial locality.
125 if ( bl1_is_row_storage( b_rs, b_cs ) )
126 {
127 if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
129 {
133 }
134 }
135 }
136
137 for ( j = 0; j < n_iter; j++ )
138 {
139 a_begin = a + j*lda;
140 b_begin = b + j*ldb;
141
143 a_begin, inca,
144 b_begin, incb );
145 }
146}

References bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_scopy(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by bl1_screate_contigm(), bl1_screate_contigmt(), bl1_sfree_saved_contigm(), bl1_sfree_saved_contigmsr(), bl1_ssymm(), bl1_ssyr2k(), bl1_strmmsx(), bl1_strsmsx(), FLA_Copy_external(), and FLA_Copyt_external().

◆ bl1_scopyv()

void bl1_scopyv ( conj1_t  conj,
int  m,
float x,
int  incx,
float y,
int  incy 
)

◆ bl1_sdcopymr()

void bl1_sdcopymr ( uplo1_t  uplo,
int  m,
int  n,
float a,
int  a_rs,
int  a_cs,
double b,
int  b_rs,
int  b_cs 
)
335{
336 float* a_begin;
337 double* b_begin;
338 int lda, inca;
339 int ldb, incb;
340 int n_iter;
341 int n_elem_max;
342 int n_elem;
343 int j;
344
345 // Return early if possible.
346 if ( bl1_zero_dim2( m, n ) ) return;
347
348 // We initialize for column-major.
349 n_iter = n;
350 n_elem_max = m;
351 lda = a_cs;
352 inca = a_rs;
353 ldb = b_cs;
354 incb = b_rs;
355
356 // An optimization: if B is row-major, then let's access the matrix
357 // by rows instead of by columns for increased spatial locality.
358 if ( bl1_is_row_storage( b_rs, b_cs ) )
359 {
363 bl1_toggle_uplo( uplo );
364 }
365
366
367 if ( bl1_is_upper( uplo ) )
368 {
369 for ( j = 0; j < n_iter; j++ )
370 {
371 n_elem = bl1_min( j + 1, n_elem_max );
372 a_begin = a + j*lda;
373 b_begin = b + j*ldb;
374
376 n_elem,
377 a_begin, inca,
378 b_begin, incb );
379 }
380 }
381 else // if ( bl1_is_lower( uplo ) )
382 {
383 for ( j = 0; j < n_iter; j++ )
384 {
385 n_elem = bl1_max( 0, n_elem_max - j );
386 a_begin = a + j*lda + j*inca;
387 b_begin = b + j*ldb + j*incb;
388
389 if ( n_elem <= 0 ) break;
390
392 n_elem,
393 a_begin, inca,
394 b_begin, incb );
395 }
396 }
397}
void bl1_sdcopyv(conj1_t conj, int m, float *x, int incx, double *y, int incy)
Definition bl1_copyv.c:80

References bl1_is_row_storage(), bl1_is_upper(), bl1_sdcopyv(), bl1_zero_dim2(), and BLIS1_NO_CONJUGATE.

Referenced by FLA_Copyr_external().

◆ bl1_sdcopymrt()

void bl1_sdcopymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
float a,
int  a_rs,
int  a_cs,
double b,
int  b_rs,
int  b_cs 
)
543{
544 float* a_begin;
545 double* b_begin;
546 int lda, inca;
547 int ldb, incb;
548 int n_iter;
549 int n_elem;
550 int n_elem_max;
552 int j;
554
555 // Return early if possible.
556 if ( bl1_zero_dim2( m, n ) ) return;
557
558 // Initialize variables based on storage format of B and value of uplo.
559 if ( bl1_is_col_storage( b_rs, b_cs ) )
560 {
561 if ( bl1_is_lower( uplo ) )
562 {
563 n_iter = bl1_min( m, n );
564 n_elem_max = m;
565 lda = a_cs;
566 inca = a_rs;
567 ldb = b_cs;
568 incb = b_rs;
570 }
571 else // if ( bl1_is_upper( uplo ) )
572 {
573 n_iter = n;
574 n_elem_max = bl1_min( m, n );
575 lda = a_cs;
576 inca = a_rs;
577 ldb = b_cs;
578 incb = b_rs;
580 }
581 }
582 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
583 {
584 if ( bl1_is_lower( uplo ) )
585 {
586 n_iter = m;
587 n_elem_max = bl1_min( m, n );
588 lda = a_rs;
589 inca = a_cs;
590 ldb = b_rs;
591 incb = b_cs;
593 }
594 else // if ( bl1_is_upper( uplo ) )
595 {
596 n_iter = bl1_min( m, n );
597 n_elem_max = n;
598 lda = a_rs;
599 inca = a_cs;
600 ldb = b_rs;
601 incb = b_cs;
603 }
604 }
605
606 // Swap lda and inca if we're doing a transpose.
607 if ( bl1_does_trans( trans ) )
608 {
610 }
611
612 // Extract conj component from trans parameter.
614
615 // Choose the loop based on whether n_elem will be shrinking or growing
616 // with each iteration.
618 {
619 for ( j = 0; j < n_iter; j++ )
620 {
621 n_elem = n_elem_max - j;
622 a_begin = a + j*lda + j*inca;
623 b_begin = b + j*ldb + j*incb;
624
626 n_elem,
627 a_begin, inca,
628 b_begin, incb );
629 }
630 }
631 else // if ( n_elem_is_ascending )
632 {
633 for ( j = 0; j < n_iter; j++ )
634 {
635 n_elem = bl1_min( j + 1, n_elem_max );
636 a_begin = a + j*lda;
637 b_begin = b + j*ldb;
638
640 n_elem,
641 a_begin, inca,
642 b_begin, incb );
643 }
644 }
645}

References bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), bl1_sdcopyv(), and bl1_zero_dim2().

Referenced by FLA_Copyrt_external().

◆ bl1_sdcopymt()

void bl1_sdcopymt ( trans1_t  trans,
int  m,
int  n,
float a,
int  a_rs,
int  a_cs,
double b,
int  b_rs,
int  b_cs 
)
429{
430 float* a_begin;
431 double* b_begin;
432 int lda, inca;
433 int ldb, incb;
434 int n_iter;
435 int n_elem;
436 int j;
438
439 // Return early if possible.
440 if ( bl1_zero_dim2( m, n ) ) return;
441
442 // Handle cases where A and B are vectors to ensure that the underlying copy
443 // gets invoked only once.
444 if ( bl1_is_vector( m, n ) )
445 {
446 // Initialize with values appropriate for vectors.
447 n_iter = 1;
448 n_elem = bl1_vector_dim( m, n );
449 lda = 1; // multiplied by zero when n_iter == 1; not needed.
450 inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
451 ldb = 1; // multiplied by zero when n_iter == 1; not needed.
453 }
454 else // matrix case
455 {
456 // Initialize with optimal values for column-major storage of B.
457 n_iter = n;
458 n_elem = m;
459 lda = a_cs;
460 inca = a_rs;
461 ldb = b_cs;
462 incb = b_rs;
463
464 // Handle the transposition of A.
465 if ( bl1_does_trans( trans ) )
466 {
468 }
469
470 // An optimization: if B is row-major, then let's access the matrix by rows
471 // instead of by columns for increased spatial locality.
472 if ( bl1_is_row_storage( b_rs, b_cs ) )
473 {
477 }
478 }
479
480 // Extract conj component from trans parameter.
482
483 for ( j = 0; j < n_iter; ++j )
484 {
485 a_begin = a + j*lda;
486 b_begin = b + j*ldb;
487
489 n_elem,
490 a_begin, inca,
491 b_begin, incb );
492 }
493}

References bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_sdcopyv(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Copy_external(), and FLA_Copyt_external().

◆ bl1_sdcopyv()

void bl1_sdcopyv ( conj1_t  conj,
int  m,
float x,
int  incx,
double y,
int  incy 
)
81{
82 float* chi;
83 double* psi;
84 int i;
85
86 // Return early if possible.
87 if ( bl1_zero_dim1( m ) ) return;
88
89 // Initialize pointers.
90 chi = x;
91 psi = y;
92
93 for ( i = 0; i < m; ++i )
94 {
95 *psi = *chi;
96
97 chi += incx;
98 psi += incy;
99 }
100}

References bl1_zero_dim1(), and i.

Referenced by bl1_sdcopymr(), bl1_sdcopymrt(), and bl1_sdcopymt().

◆ bl1_sdot()

void bl1_sdot ( conj1_t  conj,
int  n,
float x,
int  incx,
float y,
int  incy,
float rho 
)

◆ bl1_sdot2s()

void bl1_sdot2s ( conj1_t  conj,
int  n,
float alpha,
float x,
int  incx,
float y,
int  incy,
float beta,
float rho 
)

◆ bl1_sdots()

void bl1_sdots ( conj1_t  conj,
int  n,
float alpha,
float x,
int  incx,
float y,
int  incy,
float beta,
float rho 
)

◆ bl1_sfnorm()

void bl1_sfnorm ( int  m,
int  n,
float a,
int  a_rs,
int  a_cs,
float norm 
)
14{
15 float* a_ij;
16 float sum;
17 int lda, inca;
18 int n_iter;
19 int n_elem;
20 int i, j;
21
22 // Return early if possible.
23 if ( bl1_zero_dim2( m, n ) ) return;
24
25 // Handle cases where A is a vector separately.
26 if ( bl1_is_vector( m, n ) )
27 {
28 // Initialize with values appropriate for vectors.
29 n_iter = 1;
30 n_elem = bl1_vector_dim( m, n );
31 lda = 1; // multiplied by zero when n_iter == 1; not needed.
33 }
34 else // matrix case
35 {
36 // Initialize with optimal values for column-major storage.
37 n_iter = n;
38 n_elem = m;
39 lda = a_cs;
40 inca = a_rs;
41
42 // An optimization: if A is row-major, then let's access the matrix by
43 // rows instead of by columns for increased spatial locality.
45 {
48 }
49 }
50
51 // Initialize the accumulator variable.
52 sum = 0.0F;
53
54 for ( j = 0; j < n_iter; j++ )
55 {
56 for ( i = 0; i < n_elem; i++ )
57 {
58 a_ij = a + i*inca + j*lda;
59 sum += (*a_ij) * (*a_ij);
60 }
61 }
62
63 // Compute the norm and store the result.
64 *norm = ( float ) sqrt( sum );
65}

References bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), BLIS1_NO_TRANSPOSE, and i.

Referenced by FLA_Norm_frob().

◆ bl1_sinvscalm()

void bl1_sinvscalm ( conj1_t  conj,
int  m,
int  n,
float alpha,
float a,
int  a_rs,
int  a_cs 
)
14{
15 float alpha_inv;
16 float* a_begin;
17 int lda, inca;
18 int n_iter;
19 int n_elem;
20 int j;
21
22 // Return early if possible.
23 if ( bl1_zero_dim2( m, n ) ) return;
24 if ( bl1_seq1( alpha ) ) return;
25
26 // Handle cases where A is a vector to ensure that the underlying axpy
27 // gets invoked only once.
28 if ( bl1_is_vector( m, n ) )
29 {
30 // Initialize with values appropriate for a vector.
31 n_iter = 1;
32 n_elem = bl1_vector_dim( m, n );
33 lda = 1; // multiplied by zero when n_iter == 1; not needed.
35 }
36 else // matrix case
37 {
38 // Initialize with optimal values for column-major storage.
39 n_iter = n;
40 n_elem = m;
41 lda = a_cs;
42 inca = a_rs;
43
44 // An optimization: if A is row-major, then let's access the matrix
45 // by rows instead of by columns to increase spatial locality.
47 {
50 }
51 }
52
54
55 for ( j = 0; j < n_iter; j++ )
56 {
57 a_begin = a + j*lda;
58
60 &alpha_inv,
61 a_begin, inca );
62 }
63}

References bl1_is_row_storage(), bl1_is_vector(), bl1_sinvert2s(), bl1_sscal(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Inv_scal_external(), and FLA_Inv_scalc_external().

◆ bl1_sinvscalv()

void bl1_sinvscalv ( conj1_t  conj,
int  n,
float alpha,
float x,
int  incx 
)

◆ bl1_snrm2()

void bl1_snrm2 ( int  n,
float x,
int  incx,
float norm 
)
14{
15#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
16 *norm = cblas_snrm2( n,
17 x, incx );
18#else
19 *norm = F77_snrm2( &n,
20 x, &incx );
21#endif
22}
float F77_snrm2(int *n, float *x, int *incx)
float cblas_snrm2(const int N, const float *X, const int incX)

References cblas_snrm2(), and F77_snrm2().

Referenced by FLA_Househ2_UT_l_ops(), FLA_Househ2s_UT_l_ops(), FLA_Househ3UD_UT_ops(), and FLA_Nrm2_external().

◆ bl1_sscal()

void bl1_sscal ( int  n,
float alpha,
float x,
int  incx 
)
14{
15#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
16 cblas_sscal( n,
17 *alpha,
18 x, incx );
19#else
20 F77_sscal( &n,
21 alpha,
22 x, &incx );
23#endif
24}
void F77_sscal(int *n, float *alpha, float *y, int *incy)
void cblas_sscal(const int N, const float alpha, float *X, const int incX)

References cblas_sscal(), and F77_sscal().

Referenced by bl1_cconjm(), bl1_cconjmr(), bl1_cconjv(), bl1_saxpysmt(), bl1_saxpysv(), bl1_sinvscalm(), bl1_sinvscalv(), bl1_sscalm(), bl1_sscalmr(), bl1_sscalv(), and FLA_SA_LU_unb().

◆ bl1_sscalm()

void bl1_sscalm ( conj1_t  conj,
int  m,
int  n,
float alpha,
float a,
int  a_rs,
int  a_cs 
)
14{
15 float alpha_conj;
16 float* a_begin;
17 int lda, inca;
18 int n_iter;
19 int n_elem;
20 int j;
21
22 // Return early if possible.
23 if ( bl1_zero_dim2( m, n ) ) return;
24 if ( bl1_seq1( alpha ) ) return;
25
26 // Handle cases where A is a vector to ensure that the underlying axpy
27 // gets invoked only once.
28 if ( bl1_is_vector( m, n ) )
29 {
30 // Initialize with values appropriate for a vector.
31 n_iter = 1;
32 n_elem = bl1_vector_dim( m, n );
33 lda = 1; // multiplied by zero when n_iter == 1; not needed.
35 }
36 else // matrix case
37 {
38 // Initialize with optimal values for column-major storage.
39 n_iter = n;
40 n_elem = m;
41 lda = a_cs;
42 inca = a_rs;
43
44 // An optimization: if A is row-major, then let's access the matrix
45 // by rows instead of by columns to increase spatial locality.
47 {
50 }
51 }
52
54
55 for ( j = 0; j < n_iter; j++ )
56 {
57 a_begin = a + j*lda;
58
61 a_begin, inca );
62 }
63}

References bl1_is_row_storage(), bl1_is_vector(), bl1_sscal(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by bl1_sgemm(), bl1_ssymm(), bl1_strmmsx(), bl1_strsmsx(), FLA_Lyap_h_ops_var1(), FLA_Lyap_h_ops_var2(), FLA_Lyap_h_ops_var3(), FLA_Lyap_h_ops_var4(), FLA_Lyap_n_ops_var1(), FLA_Lyap_n_ops_var2(), FLA_Lyap_n_ops_var3(), FLA_Lyap_n_ops_var4(), FLA_Scal_external(), and FLA_Scalc_external().

◆ bl1_sscalmr()

void bl1_sscalmr ( uplo1_t  uplo,
int  m,
int  n,
float alpha,
float a,
int  a_rs,
int  a_cs 
)
14{
15 float* a_begin;
16 int lda, inca;
17 int n_iter;
18 int n_elem_max;
19 int n_elem;
20 int j;
21
22 // Return early if possible.
23 if ( bl1_zero_dim2( m, n ) ) return;
24 if ( bl1_seq1( alpha ) ) return;
25
26 // We initialize for column-major.
27 n_iter = n;
28 n_elem_max = m;
29 lda = a_cs;
30 inca = a_rs;
31
32 // An optimization: if A is row-major, then let's access the matrix
33 // by rows instead of by columns to increase spatial locality.
35 {
38 bl1_toggle_uplo( uplo );
39 }
40
41 if ( bl1_is_upper( uplo ) )
42 {
43 for ( j = 0; j < n_iter; j++ )
44 {
45 n_elem = bl1_min( j + 1, n_elem_max );
46 a_begin = a + j*lda;
47
49 alpha,
50 a_begin, inca );
51 }
52 }
53 else // if ( bl1_is_lower( uplo ) )
54 {
55 for ( j = 0; j < n_iter; j++ )
56 {
57 n_elem = bl1_max( 0, n_elem_max - j );
58 a_begin = a + j*lda + j*inca;
59
60 if ( n_elem <= 0 ) break;
61
63 alpha,
64 a_begin, inca );
65 }
66 }
67}

References bl1_is_row_storage(), bl1_is_upper(), bl1_sscal(), and bl1_zero_dim2().

Referenced by FLA_Scalr_external().

◆ bl1_sscalv()

void bl1_sscalv ( conj1_t  conj,
int  n,
float alpha,
float x,
int  incx 
)

◆ bl1_sscopymr()

void bl1_sscopymr ( uplo1_t  uplo,
int  m,
int  n,
float a,
int  a_rs,
int  a_cs,
float b,
int  b_rs,
int  b_cs 
)
269{
270 float* a_begin;
271 float* b_begin;
272 int lda, inca;
273 int ldb, incb;
274 int n_iter;
275 int n_elem_max;
276 int n_elem;
277 int j;
278
279 // Return early if possible.
280 if ( bl1_zero_dim2( m, n ) ) return;
281
282 // We initialize for column-major.
283 n_iter = n;
284 n_elem_max = m;
285 lda = a_cs;
286 inca = a_rs;
287 ldb = b_cs;
288 incb = b_rs;
289
290 // An optimization: if B is row-major, then let's access the matrix
291 // by rows instead of by columns for increased spatial locality.
292 if ( bl1_is_row_storage( b_rs, b_cs ) )
293 {
297 bl1_toggle_uplo( uplo );
298 }
299
300
301 if ( bl1_is_upper( uplo ) )
302 {
303 for ( j = 0; j < n_iter; j++ )
304 {
305 n_elem = bl1_min( j + 1, n_elem_max );
306 a_begin = a + j*lda;
307 b_begin = b + j*ldb;
308
310 n_elem,
311 a_begin, inca,
312 b_begin, incb );
313 }
314 }
315 else // if ( bl1_is_lower( uplo ) )
316 {
317 for ( j = 0; j < n_iter; j++ )
318 {
319 n_elem = bl1_max( 0, n_elem_max - j );
320 a_begin = a + j*lda + j*inca;
321 b_begin = b + j*ldb + j*incb;
322
323 if ( n_elem <= 0 ) break;
324
326 n_elem,
327 a_begin, inca,
328 b_begin, incb );
329 }
330 }
331}

References bl1_is_row_storage(), bl1_is_upper(), bl1_scopyv(), bl1_zero_dim2(), and BLIS1_NO_CONJUGATE.

◆ bl1_sscopymrt()

void bl1_sscopymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
float a,
int  a_rs,
int  a_cs,
float b,
int  b_rs,
int  b_cs 
)
437{
438 float* a_begin;
439 float* b_begin;
440 int lda, inca;
441 int ldb, incb;
442 int n_iter;
443 int n_elem;
444 int n_elem_max;
446 int j;
448
449 // Return early if possible.
450 if ( bl1_zero_dim2( m, n ) ) return;
451
452 // Initialize variables based on storage format of B and value of uplo.
453 if ( bl1_is_col_storage( b_rs, b_cs ) )
454 {
455 if ( bl1_is_lower( uplo ) )
456 {
457 n_iter = bl1_min( m, n );
458 n_elem_max = m;
459 lda = a_cs;
460 inca = a_rs;
461 ldb = b_cs;
462 incb = b_rs;
464 }
465 else // if ( bl1_is_upper( uplo ) )
466 {
467 n_iter = n;
468 n_elem_max = bl1_min( m, n );
469 lda = a_cs;
470 inca = a_rs;
471 ldb = b_cs;
472 incb = b_rs;
474 }
475 }
476 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
477 {
478 if ( bl1_is_lower( uplo ) )
479 {
480 n_iter = m;
481 n_elem_max = bl1_min( m, n );
482 lda = a_rs;
483 inca = a_cs;
484 ldb = b_rs;
485 incb = b_cs;
487 }
488 else // if ( bl1_is_upper( uplo ) )
489 {
490 n_iter = bl1_min( m, n );
491 n_elem_max = n;
492 lda = a_rs;
493 inca = a_cs;
494 ldb = b_rs;
495 incb = b_cs;
497 }
498 }
499
500 // Swap lda and inca if we're doing a transpose.
501 if ( bl1_does_trans( trans ) )
502 {
504 }
505
506 // Extract conj component from trans parameter.
508
509 // Choose the loop based on whether n_elem will be shrinking or growing
510 // with each iteration.
512 {
513 for ( j = 0; j < n_iter; j++ )
514 {
515 n_elem = n_elem_max - j;
516 a_begin = a + j*lda + j*inca;
517 b_begin = b + j*ldb + j*incb;
518
520 n_elem,
521 a_begin, inca,
522 b_begin, incb );
523 }
524 }
525 else // if ( n_elem_is_ascending )
526 {
527 for ( j = 0; j < n_iter; j++ )
528 {
529 n_elem = bl1_min( j + 1, n_elem_max );
530 a_begin = a + j*lda;
531 b_begin = b + j*ldb;
532
534 n_elem,
535 a_begin, inca,
536 b_begin, incb );
537 }
538 }
539}

References bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), bl1_scopyv(), and bl1_zero_dim2().

◆ bl1_sscopymt()

void bl1_sscopymt ( trans1_t  trans,
int  m,
int  n,
float a,
int  a_rs,
int  a_cs,
float b,
int  b_rs,
int  b_cs 
)
361{
362 float* a_begin;
363 float* b_begin;
364 int lda, inca;
365 int ldb, incb;
366 int n_iter;
367 int n_elem;
368 int j;
370
371 // Return early if possible.
372 if ( bl1_zero_dim2( m, n ) ) return;
373
374 // Handle cases where A and B are vectors to ensure that the underlying copy
375 // gets invoked only once.
376 if ( bl1_is_vector( m, n ) )
377 {
378 // Initialize with values appropriate for vectors.
379 n_iter = 1;
380 n_elem = bl1_vector_dim( m, n );
381 lda = 1; // multiplied by zero when n_iter == 1; not needed.
382 inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
383 ldb = 1; // multiplied by zero when n_iter == 1; not needed.
385 }
386 else // matrix case
387 {
388 // Initialize with optimal values for column-major storage of B.
389 n_iter = n;
390 n_elem = m;
391 lda = a_cs;
392 inca = a_rs;
393 ldb = b_cs;
394 incb = b_rs;
395
396 // Handle the transposition of A.
397 if ( bl1_does_trans( trans ) )
398 {
400 }
401
402 // An optimization: if B is row-major, then let's access the matrix by rows
403 // instead of by columns for increased spatial locality.
404 if ( bl1_is_row_storage( b_rs, b_cs ) )
405 {
409 }
410 }
411
412 // Extract conj component from trans parameter.
414
415 for ( j = 0; j < n_iter; ++j )
416 {
417 a_begin = a + j*lda;
418 b_begin = b + j*ldb;
419
421 n_elem,
422 a_begin, inca,
423 b_begin, incb );
424 }
425}

References bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_scopyv(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

◆ bl1_sswap()

void bl1_sswap ( int  n,
float x,
int  incx,
float y,
int  incy 
)
14{
15#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
16 cblas_sswap( n,
17 x, incx,
18 y, incy );
19#else
20 F77_sswap( &n,
21 x, &incx,
22 y, &incy );
23#endif
24}
void F77_sswap(int *n, float *x, int *incx, float *y, int *incy)
void cblas_sswap(const int N, float *X, const int incX, float *Y, const int incY)

References cblas_sswap(), and F77_sswap().

Referenced by bl1_sswapmt(), bl1_sswapv(), FLA_SA_Apply_pivots(), and FLA_SA_LU_unb().

◆ bl1_sswapmt()

void bl1_sswapmt ( trans1_t  trans,
int  m,
int  n,
float a,
int  a_rs,
int  a_cs,
float b,
int  b_rs,
int  b_cs 
)
14{
15 float* a_begin;
16 float* b_begin;
17 int lda, inca;
18 int ldb, incb;
19 int n_iter;
20 int n_elem;
21 int j;
22
23 // Return early if possible.
24 if ( bl1_zero_dim2( m, n ) ) return;
25
26 // Handle cases where A and B are vectors to ensure that the underlying copy
27 // gets invoked only once.
28 if ( bl1_is_vector( m, n ) )
29 {
30 // Initialize with values appropriate for vectors.
31 n_iter = 1;
32 n_elem = bl1_vector_dim( m, n );
33 lda = 1; // multiplied by zero when n_iter == 1; not needed.
34 inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
35 ldb = 1; // multiplied by zero when n_iter == 1; not needed.
37 }
38 else // matrix case
39 {
40 // Initialize with optimal values for column-major storage.
41 n_iter = n;
42 n_elem = m;
43 lda = a_cs;
44 inca = a_rs;
45 ldb = b_cs;
46 incb = b_rs;
47
48 // Handle the transposition of A.
49 if ( bl1_does_trans( trans ) )
50 {
52 }
53
54 // An optimization: if B is row-major and if A is effectively row-major
55 // after a possible transposition, then let's access the matrix by rows
56 // instead of by columns for increased spatial locality.
58 {
59 if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
61 {
65 }
66 }
67 }
68
69 for ( j = 0; j < n_iter; j++ )
70 {
71 a_begin = a + j*lda;
72 b_begin = b + j*ldb;
73
75 a_begin, inca,
76 b_begin, incb );
77 }
78}
void bl1_sswap(int n, float *x, int incx, float *y, int incy)
Definition bl1_swap.c:13

References bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_sswap(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Swap_external(), and FLA_Swapt_external().

◆ bl1_sswapv()

void bl1_sswapv ( int  n,
float x,
int  incx,
float y,
int  incy 
)
14{
15 // Return early if possible.
16 if ( bl1_zero_dim1( n ) ) return;
17
18 bl1_sswap( n,
19 x, incx,
20 y, incy );
21}

References bl1_sswap(), and bl1_zero_dim1().

Referenced by FLA_Apply_pivots_macro_external(), FLA_Sort_bsvd_ext_b_ops(), and FLA_Sort_bsvd_ext_f_ops().

◆ bl1_szcopymr()

void bl1_szcopymr ( uplo1_t  uplo,
int  m,
int  n,
float a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)
595{
596 float* a_begin;
598 int lda, inca;
599 int ldb, incb;
600 int n_iter;
601 int n_elem_max;
602 int n_elem;
603 int j;
604
605 // Return early if possible.
606 if ( bl1_zero_dim2( m, n ) ) return;
607
608 // We initialize for column-major.
609 n_iter = n;
610 n_elem_max = m;
611 lda = a_cs;
612 inca = a_rs;
613 ldb = b_cs;
614 incb = b_rs;
615
616 // An optimization: if B is row-major, then let's access the matrix
617 // by rows instead of by columns for increased spatial locality.
618 if ( bl1_is_row_storage( b_rs, b_cs ) )
619 {
623 bl1_toggle_uplo( uplo );
624 }
625
626
627 if ( bl1_is_upper( uplo ) )
628 {
629 for ( j = 0; j < n_iter; j++ )
630 {
631 n_elem = bl1_min( j + 1, n_elem_max );
632 a_begin = a + j*lda;
633 b_begin = b + j*ldb;
634
636 n_elem,
637 a_begin, inca,
638 b_begin, incb );
639 }
640 }
641 else // if ( bl1_is_lower( uplo ) )
642 {
643 for ( j = 0; j < n_iter; j++ )
644 {
645 n_elem = bl1_max( 0, n_elem_max - j );
646 a_begin = a + j*lda + j*inca;
647 b_begin = b + j*ldb + j*incb;
648
649 if ( n_elem <= 0 ) break;
650
652 n_elem,
653 a_begin, inca,
654 b_begin, incb );
655 }
656 }
657}
void bl1_szcopyv(conj1_t conj, int m, float *x, int incx, dcomplex *y, int incy)
Definition bl1_copyv.c:169

References bl1_is_row_storage(), bl1_is_upper(), bl1_szcopyv(), bl1_zero_dim2(), and BLIS1_NO_CONJUGATE.

Referenced by FLA_Copyr_external().

◆ bl1_szcopymrt()

void bl1_szcopymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
float a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)
755{
756 float* a_begin;
758 int lda, inca;
759 int ldb, incb;
760 int n_iter;
761 int n_elem;
762 int n_elem_max;
764 int j;
766
767 // Return early if possible.
768 if ( bl1_zero_dim2( m, n ) ) return;
769
770 // Initialize variables based on storage format of B and value of uplo.
771 if ( bl1_is_col_storage( b_rs, b_cs ) )
772 {
773 if ( bl1_is_lower( uplo ) )
774 {
775 n_iter = bl1_min( m, n );
776 n_elem_max = m;
777 lda = a_cs;
778 inca = a_rs;
779 ldb = b_cs;
780 incb = b_rs;
782 }
783 else // if ( bl1_is_upper( uplo ) )
784 {
785 n_iter = n;
786 n_elem_max = bl1_min( m, n );
787 lda = a_cs;
788 inca = a_rs;
789 ldb = b_cs;
790 incb = b_rs;
792 }
793 }
794 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
795 {
796 if ( bl1_is_lower( uplo ) )
797 {
798 n_iter = m;
799 n_elem_max = bl1_min( m, n );
800 lda = a_rs;
801 inca = a_cs;
802 ldb = b_rs;
803 incb = b_cs;
805 }
806 else // if ( bl1_is_upper( uplo ) )
807 {
808 n_iter = bl1_min( m, n );
809 n_elem_max = n;
810 lda = a_rs;
811 inca = a_cs;
812 ldb = b_rs;
813 incb = b_cs;
815 }
816 }
817
818 // Swap lda and inca if we're doing a transpose.
819 if ( bl1_does_trans( trans ) )
820 {
822 }
823
824 // Extract conj component from trans parameter.
826
827 // Choose the loop based on whether n_elem will be shrinking or growing
828 // with each iteration.
830 {
831 for ( j = 0; j < n_iter; j++ )
832 {
833 n_elem = n_elem_max - j;
834 a_begin = a + j*lda + j*inca;
835 b_begin = b + j*ldb + j*incb;
836
838 n_elem,
839 a_begin, inca,
840 b_begin, incb );
841 }
842 }
843 else // if ( n_elem_is_ascending )
844 {
845 for ( j = 0; j < n_iter; j++ )
846 {
847 n_elem = bl1_min( j + 1, n_elem_max );
848 a_begin = a + j*lda;
849 b_begin = b + j*ldb;
850
852 n_elem,
853 a_begin, inca,
854 b_begin, incb );
855 }
856 }
857}

References bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), bl1_szcopyv(), and bl1_zero_dim2().

Referenced by FLA_Copyrt_external().

◆ bl1_szcopymt()

void bl1_szcopymt ( trans1_t  trans,
int  m,
int  n,
float a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)
697{
698 float* a_begin;
700 int lda, inca;
701 int ldb, incb;
702 int n_iter;
703 int n_elem;
704 int j;
706
707 // Return early if possible.
708 if ( bl1_zero_dim2( m, n ) ) return;
709
710 // Handle cases where A and B are vectors to ensure that the underlying copy
711 // gets invoked only once.
712 if ( bl1_is_vector( m, n ) )
713 {
714 // Initialize with values appropriate for vectors.
715 n_iter = 1;
716 n_elem = bl1_vector_dim( m, n );
717 lda = 1; // multiplied by zero when n_iter == 1; not needed.
718 inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
719 ldb = 1; // multiplied by zero when n_iter == 1; not needed.
721 }
722 else // matrix case
723 {
724 // Initialize with optimal values for column-major storage of B.
725 n_iter = n;
726 n_elem = m;
727 lda = a_cs;
728 inca = a_rs;
729 ldb = b_cs;
730 incb = b_rs;
731
732 // Handle the transposition of A.
733 if ( bl1_does_trans( trans ) )
734 {
736 }
737
738 // An optimization: if B is row-major, then let's access the matrix by rows
739 // instead of by columns for increased spatial locality.
740 if ( bl1_is_row_storage( b_rs, b_cs ) )
741 {
745 }
746 }
747
748 // Extract conj component from trans parameter.
750
751 for ( j = 0; j < n_iter; ++j )
752 {
753 a_begin = a + j*lda;
754 b_begin = b + j*ldb;
755
757 n_elem,
758 a_begin, inca,
759 b_begin, incb );
760 }
761}

References bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_szcopyv(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Copy_external(), and FLA_Copyt_external().

◆ bl1_szcopyv()

void bl1_szcopyv ( conj1_t  conj,
int  m,
float x,
int  incx,
dcomplex y,
int  incy 
)
170{
171 float* chi;
172 dcomplex* psi;
173 int i;
174
175 // Return early if possible.
176 if ( bl1_zero_dim1( m ) ) return;
177
178 // Initialize pointers.
179 chi = x;
180 psi = y;
181
182 for ( i = 0; i < m; ++i )
183 {
184 psi->real = *chi;
185 psi->imag = 0.0;
186
187 chi += incx;
188 psi += incy;
189 }
190}

References bl1_zero_dim1(), i, dcomplex::imag, and dcomplex::real.

Referenced by bl1_szcopymr(), bl1_szcopymrt(), and bl1_szcopymt().

◆ bl1_zamax()

void bl1_zamax ( int  n,
dcomplex x,
int  incx,
int index 
)
47{
48#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
49 *index = cblas_izamax( n,
50 x, incx );
51#else
52 *index = F77_izamax( &n,
53 x, &incx ) - 1;
54#endif
55}
int F77_izamax(int *n, dcomplex *x, int *incx)
CBLAS_INDEX cblas_izamax(const int N, const void *X, const int incX)

References cblas_izamax(), and F77_izamax().

Referenced by FLA_Amax_external(), FLA_LU_piv_opz_var3(), FLA_LU_piv_opz_var4(), FLA_LU_piv_opz_var5(), and FLA_SA_LU_unb().

◆ bl1_zasum()

void bl1_zasum ( int  n,
dcomplex x,
int  incx,
double norm 
)
47{
48#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
49 *norm = cblas_dzasum( n,
50 x, incx );
51#else
52 *norm = F77_dzasum( &n,
53 x, &incx );
54#endif
55}
double F77_dzasum(int *n, dcomplex *x, int *incx)
double cblas_dzasum(const int N, const void *X, const int incX)

References cblas_dzasum(), and F77_dzasum().

Referenced by FLA_Asum_external().

◆ bl1_zaxpy()

void bl1_zaxpy ( int  n,
dcomplex alpha,
dcomplex x,
int  incx,
dcomplex y,
int  incy 
)
59{
60#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
61 cblas_zaxpy( n,
62 alpha,
63 x, incx,
64 y, incy );
65#else
66 F77_zaxpy( &n,
67 alpha,
68 x, &incx,
69 y, &incy );
70#endif
71}
void F77_zaxpy(int *n, dcomplex *alpha, dcomplex *x, int *incx, dcomplex *y, int *incy)
void cblas_zaxpy(const int N, const void *alpha, const void *X, const int incX, void *Y, const int incY)

References cblas_zaxpy(), and F77_zaxpy().

Referenced by bl1_zaxpymt(), bl1_zaxpysmt(), bl1_zaxpysv(), and bl1_zaxpyv().

◆ bl1_zaxpymrt()

void bl1_zaxpymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
dcomplex alpha,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)
335{
338 int lda, inca;
339 int ldb, incb;
340 int n_iter;
341 int n_elem;
342 int n_elem_max;
344 int j;
346
347 // Return early if possible.
348 if ( bl1_zero_dim2( m, n ) ) return;
349
350 // Initialize variables based on storage format of B and value of uplo.
351 if ( bl1_is_col_storage( b_rs, b_cs ) )
352 {
353 if ( bl1_is_lower( uplo ) )
354 {
355 n_iter = bl1_min( m, n );
356 n_elem_max = m;
357 lda = a_cs;
358 inca = a_rs;
359 ldb = b_cs;
360 incb = b_rs;
362 }
363 else // if ( bl1_is_upper( uplo ) )
364 {
365 n_iter = n;
366 n_elem_max = bl1_min( m, n );
367 lda = a_cs;
368 inca = a_rs;
369 ldb = b_cs;
370 incb = b_rs;
372 }
373 }
374 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
375 {
376 if ( bl1_is_lower( uplo ) )
377 {
378 n_iter = m;
379 n_elem_max = bl1_min( m, n );
380 lda = a_rs;
381 inca = a_cs;
382 ldb = b_rs;
383 incb = b_cs;
385 }
386 else // if ( bl1_is_upper( uplo ) )
387 {
388 n_iter = bl1_min( m, n );
389 n_elem_max = n;
390 lda = a_rs;
391 inca = a_cs;
392 ldb = b_rs;
393 incb = b_cs;
395 }
396 }
397
398 // Swap lda and inca if we're doing a transpose.
399 if ( bl1_does_trans( trans ) )
400 {
402 }
403
404 // Extract conj component from trans parameter.
406
407 // Choose the loop based on whether n_elem will be shrinking or growing
408 // with each iteration.
410 {
411 for ( j = 0; j < n_iter; j++ )
412 {
413 n_elem = n_elem_max - j;
414 a_begin = a + j*lda + j*inca;
415 b_begin = b + j*ldb + j*incb;
416
418 n_elem,
419 alpha,
420 a_begin, inca,
421 b_begin, incb );
422 }
423 }
424 else // if ( n_elem_is_ascending )
425 {
426 for ( j = 0; j < n_iter; j++ )
427 {
428 n_elem = bl1_min( j + 1, n_elem_max );
429 a_begin = a + j*lda;
430 b_begin = b + j*ldb;
431
433 n_elem,
434 alpha,
435 a_begin, inca,
436 b_begin, incb );
437 }
438 }
439}
void bl1_zaxpyv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy)
Definition bl1_axpyv.c:60

References bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), bl1_zaxpyv(), and bl1_zero_dim2().

Referenced by bl1_zher2k(), bl1_zherk(), and FLA_Axpyrt_external().

◆ bl1_zaxpymt()

void bl1_zaxpymt ( trans1_t  trans,
int  m,
int  n,
dcomplex alpha,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)
249{
253 int inca_temp;
254 int lda, inca;
255 int ldb, incb;
256 int n_iter;
257 int n_elem;
258 int j;
259
260 // Return early if possible.
261 if ( bl1_zero_dim2( m, n ) ) return;
262
263 // Handle cases where A and B are vectors to ensure that the underlying axpy
264 // gets invoked only once.
265 if ( bl1_is_vector( m, n ) )
266 {
267 // Initialize with values appropriate for vectors.
268 n_iter = 1;
269 n_elem = bl1_vector_dim( m, n );
270 lda = 1; // multiplied by zero when n_iter == 1; not needed.
271 inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
272 ldb = 1; // multiplied by zero when n_iter == 1; not needed.
274 }
275 else // matrix case
276 {
277 // Initialize with optimal values for column-major storage.
278 n_iter = n;
279 n_elem = m;
280 lda = a_cs;
281 inca = a_rs;
282 ldb = b_cs;
283 incb = b_rs;
284
285 // Handle the transposition of A.
286 if ( bl1_does_trans( trans ) )
287 {
289 }
290
291 // An optimization: if B is row-major and if A is effectively row-major
292 // after a possible transposition, then let's access the matrices by rows
293 // instead of by columns for increased spatial locality.
294 if ( bl1_is_row_storage( b_rs, b_cs ) )
295 {
296 if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
298 {
302 }
303 }
304 }
305
306 if ( bl1_does_conj( trans ) )
307 {
309
311 inca_temp = 1;
312
313 for ( j = 0; j < n_iter; j++ )
314 {
315 a_begin = a + j*lda;
316 b_begin = b + j*ldb;
317
319 n_elem,
320 a_begin, inca,
321 a_temp, inca_temp );
322
324 alpha,
326 b_begin, incb );
327 }
328
329 bl1_zfree( a_temp );
330 }
331 else // if ( !bl1_does_conj( trans ) )
332 {
333 for ( j = 0; j < n_iter; j++ )
334 {
335 a_begin = a + j*lda;
336 b_begin = b + j*ldb;
337
339 alpha,
340 a_begin, inca,
341 b_begin, incb );
342 }
343
344 }
345}
void bl1_zaxpy(int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy)
Definition bl1_axpy.c:58
void bl1_zcopyv(conj1_t conj, int m, dcomplex *x, int incx, dcomplex *y, int incy)
Definition bl1_copyv.c:63
dcomplex * bl1_zallocv(unsigned int n_elem)
Definition bl1_allocv.c:45
void bl1_zfree(dcomplex *p)
Definition bl1_free.c:45

References bl1_does_conj(), bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zallocv(), bl1_zaxpy(), bl1_zcopyv(), bl1_zero_dim2(), bl1_zfree(), and BLIS1_NO_TRANSPOSE.

Referenced by bl1_zgemm(), bl1_zhemm(), bl1_zsymm(), bl1_ztrmmsx(), bl1_ztrsmsx(), FLA_Axpy_external(), and FLA_Axpyt_external().

◆ bl1_zaxpysmt()

void bl1_zaxpysmt ( trans1_t  trans,
int  m,
int  n,
dcomplex alpha0,
dcomplex alpha1,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex beta,
dcomplex b,
int  b_rs,
int  b_cs 
)
274{
279 int inca_temp;
280 int lda, inca;
281 int ldb, incb;
282 int n_iter;
283 int n_elem;
284 int j;
285
286 // Return early if possible.
287 if ( bl1_zero_dim2( m, n ) ) return;
288
289 alpha_prod.real = alpha0->real * alpha1->real - alpha0->imag * alpha1->imag;
290 alpha_prod.imag = alpha0->real * alpha1->imag + alpha0->imag * alpha1->real;
291
292 // Handle cases where A and B are vectors to ensure that the underlying axpy
293 // gets invoked only once.
294 if ( bl1_is_vector( m, n ) )
295 {
296 // Initialize with values appropriate for vectors.
297 n_iter = 1;
298 n_elem = bl1_vector_dim( m, n );
299 lda = 1; // multiplied by zero when n_iter == 1; not needed.
300 inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
301 ldb = 1; // multiplied by zero when n_iter == 1; not needed.
303 }
304 else // matrix case
305 {
306 // Initialize with optimal values for column-major storage.
307 n_iter = n;
308 n_elem = m;
309 lda = a_cs;
310 inca = a_rs;
311 ldb = b_cs;
312 incb = b_rs;
313
314 // Handle the transposition of A.
315 if ( bl1_does_trans( trans ) )
316 {
318 }
319
320 // An optimization: if B is row-major and if A is effectively row-major
321 // after a possible transposition, then let's access the matrices by rows
322 // instead of by columns for increased spatial locality.
323 if ( bl1_is_row_storage( b_rs, b_cs ) )
324 {
325 if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
327 {
331 }
332 }
333 }
334
335 if ( bl1_does_conj( trans ) )
336 {
338
340 inca_temp = 1;
341
342 for ( j = 0; j < n_iter; j++ )
343 {
344 a_begin = a + j*lda;
345 b_begin = b + j*ldb;
346
348 n_elem,
349 a_begin, inca,
350 a_temp, inca_temp );
351
353 beta,
354 b_begin, incb );
355
357 &alpha_prod,
359 b_begin, incb );
360 }
361
362 bl1_zfree( a_temp );
363 }
364 else // if ( !bl1_does_conj( trans ) )
365 {
366 for ( j = 0; j < n_iter; j++ )
367 {
368 a_begin = a + j*lda;
369 b_begin = b + j*ldb;
370
372 beta,
373 b_begin, incb );
374
376 &alpha_prod,
377 a_begin, inca,
378 b_begin, incb );
379 }
380 }
381}
void bl1_zscal(int n, dcomplex *alpha, dcomplex *x, int incx)
Definition bl1_scal.c:78

References alpha1, bl1_does_conj(), bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zallocv(), bl1_zaxpy(), bl1_zcopyv(), bl1_zero_dim2(), bl1_zfree(), bl1_zscal(), BLIS1_NO_TRANSPOSE, dcomplex::imag, and dcomplex::real.

Referenced by FLA_Axpys_external().

◆ bl1_zaxpysv()

void bl1_zaxpysv ( int  n,
dcomplex alpha0,
dcomplex alpha1,
dcomplex x,
int  incx,
dcomplex beta,
dcomplex y,
int  incy 
)
72{
74
75 // Return early if possible.
76 if ( bl1_zero_dim1( n ) ) return;
77
78 alpha_prod.real = alpha0->real * alpha1->real - alpha0->imag * alpha1->imag;
79 alpha_prod.imag = alpha0->real * alpha1->imag + alpha0->imag * alpha1->real;
80
81 bl1_zscal( n,
82 beta,
83 y, incy );
84
85 bl1_zaxpy( n,
87 x, incx,
88 y, incy );
89}

References alpha1, bl1_zaxpy(), bl1_zero_dim1(), bl1_zscal(), dcomplex::imag, and dcomplex::real.

Referenced by FLA_Lyap_h_opz_var2(), FLA_Lyap_h_opz_var3(), FLA_Lyap_h_opz_var4(), FLA_Lyap_n_opz_var2(), FLA_Lyap_n_opz_var3(), and FLA_Lyap_n_opz_var4().

◆ bl1_zaxpyv()

void bl1_zaxpyv ( conj1_t  conj,
int  n,
dcomplex alpha,
dcomplex x,
int  incx,
dcomplex y,
int  incy 
)
61{
63 int incx_copy;
64
65 // Return early if possible.
66 if ( bl1_zero_dim1( n ) ) return;
67
68 x_copy = x;
70
71 if ( bl1_is_conj( conj ) )
72 {
73 x_copy = bl1_zallocv( n );
74 incx_copy = 1;
75
77 n,
78 x, incx,
80 }
81
82 bl1_zaxpy( n,
83 alpha,
85 y, incy );
86
87 if ( bl1_is_conj( conj ) )
89}

References bl1_is_conj(), bl1_zallocv(), bl1_zaxpy(), bl1_zcopyv(), bl1_zero_dim1(), and bl1_zfree().

Referenced by bl1_zaxpymrt(), bl1_zgemv(), bl1_zhemv(), bl1_ztrmvsx(), bl1_ztrsvsx(), FLA_Apply_H2_UT_l_opz_var1(), FLA_Apply_H2_UT_r_opz_var1(), FLA_Apply_HUD_UT_l_opz_var1(), FLA_Bidiag_UT_u_step_ofz_var2(), FLA_Bidiag_UT_u_step_ofz_var3(), FLA_Bidiag_UT_u_step_ofz_var4(), FLA_Bidiag_UT_u_step_opz_var2(), FLA_Bidiag_UT_u_step_opz_var3(), FLA_Bidiag_UT_u_step_opz_var4(), FLA_Bidiag_UT_u_step_opz_var5(), FLA_Eig_gest_il_opz_var1(), FLA_Eig_gest_il_opz_var2(), FLA_Eig_gest_il_opz_var3(), FLA_Eig_gest_il_opz_var4(), FLA_Eig_gest_il_opz_var5(), FLA_Eig_gest_iu_opz_var1(), FLA_Eig_gest_iu_opz_var2(), FLA_Eig_gest_iu_opz_var3(), FLA_Eig_gest_iu_opz_var4(), FLA_Eig_gest_iu_opz_var5(), FLA_Eig_gest_nl_opz_var1(), FLA_Eig_gest_nl_opz_var2(), FLA_Eig_gest_nl_opz_var4(), FLA_Eig_gest_nl_opz_var5(), FLA_Eig_gest_nu_opz_var1(), FLA_Eig_gest_nu_opz_var2(), FLA_Eig_gest_nu_opz_var4(), FLA_Eig_gest_nu_opz_var5(), FLA_Fused_Ahx_Axpy_Ax_opz_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_opz_var1(), FLA_Fused_UZhu_ZUhu_opz_var1(), FLA_Hess_UT_step_ofz_var2(), FLA_Hess_UT_step_ofz_var3(), FLA_Hess_UT_step_ofz_var4(), FLA_Hess_UT_step_opz_var2(), FLA_Hess_UT_step_opz_var3(), FLA_Hess_UT_step_opz_var4(), FLA_Hess_UT_step_opz_var5(), FLA_Tridiag_UT_l_step_ofz_var2(), FLA_Tridiag_UT_l_step_ofz_var3(), FLA_Tridiag_UT_l_step_opz_var1(), FLA_Tridiag_UT_l_step_opz_var2(), and FLA_Tridiag_UT_l_step_opz_var3().

◆ bl1_zccopymr()

void bl1_zccopymr ( uplo1_t  uplo,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)
1181{
1184 int lda, inca;
1185 int ldb, incb;
1186 int n_iter;
1187 int n_elem_max;
1188 int n_elem;
1189 int j;
1190
1191 // Return early if possible.
1192 if ( bl1_zero_dim2( m, n ) ) return;
1193
1194 // We initialize for column-major.
1195 n_iter = n;
1196 n_elem_max = m;
1197 lda = a_cs;
1198 inca = a_rs;
1199 ldb = b_cs;
1200 incb = b_rs;
1201
1202 // An optimization: if B is row-major, then let's access the matrix
1203 // by rows instead of by columns for increased spatial locality.
1204 if ( bl1_is_row_storage( b_rs, b_cs ) )
1205 {
1209 bl1_toggle_uplo( uplo );
1210 }
1211
1212
1213 if ( bl1_is_upper( uplo ) )
1214 {
1215 for ( j = 0; j < n_iter; j++ )
1216 {
1217 n_elem = bl1_min( j + 1, n_elem_max );
1218 a_begin = a + j*lda;
1219 b_begin = b + j*ldb;
1220
1222 n_elem,
1223 a_begin, inca,
1224 b_begin, incb );
1225 }
1226 }
1227 else // if ( bl1_is_lower( uplo ) )
1228 {
1229 for ( j = 0; j < n_iter; j++ )
1230 {
1231 n_elem = bl1_max( 0, n_elem_max - j );
1232 a_begin = a + j*lda + j*inca;
1233 b_begin = b + j*ldb + j*incb;
1234
1235 if ( n_elem <= 0 ) break;
1236
1238 n_elem,
1239 a_begin, inca,
1240 b_begin, incb );
1241 }
1242 }
1243}
void bl1_zccopyv(conj1_t conj, int m, dcomplex *x, int incx, scomplex *y, int incy)
Definition bl1_copyv.c:330

References bl1_is_row_storage(), bl1_is_upper(), bl1_zccopyv(), bl1_zero_dim2(), and BLIS1_NO_CONJUGATE.

Referenced by FLA_Copyr_external().

◆ bl1_zccopymrt()

void bl1_zccopymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)
1921{
1924 int lda, inca;
1925 int ldb, incb;
1926 int n_iter;
1927 int n_elem;
1928 int n_elem_max;
1930 int j;
1931 conj1_t conj;
1932
1933 // Return early if possible.
1934 if ( bl1_zero_dim2( m, n ) ) return;
1935
1936 // Initialize variables based on storage format of B and value of uplo.
1937 if ( bl1_is_col_storage( b_rs, b_cs ) )
1938 {
1939 if ( bl1_is_lower( uplo ) )
1940 {
1941 n_iter = bl1_min( m, n );
1942 n_elem_max = m;
1943 lda = a_cs;
1944 inca = a_rs;
1945 ldb = b_cs;
1946 incb = b_rs;
1948 }
1949 else // if ( bl1_is_upper( uplo ) )
1950 {
1951 n_iter = n;
1952 n_elem_max = bl1_min( m, n );
1953 lda = a_cs;
1954 inca = a_rs;
1955 ldb = b_cs;
1956 incb = b_rs;
1958 }
1959 }
1960 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
1961 {
1962 if ( bl1_is_lower( uplo ) )
1963 {
1964 n_iter = m;
1965 n_elem_max = bl1_min( m, n );
1966 lda = a_rs;
1967 inca = a_cs;
1968 ldb = b_rs;
1969 incb = b_cs;
1971 }
1972 else // if ( bl1_is_upper( uplo ) )
1973 {
1974 n_iter = bl1_min( m, n );
1975 n_elem_max = n;
1976 lda = a_rs;
1977 inca = a_cs;
1978 ldb = b_rs;
1979 incb = b_cs;
1981 }
1982 }
1983
1984 // Swap lda and inca if we're doing a transpose.
1985 if ( bl1_does_trans( trans ) )
1986 {
1988 }
1989
1990 // Extract conj component from trans parameter.
1992
1993 // Choose the loop based on whether n_elem will be shrinking or growing
1994 // with each iteration.
1996 {
1997 for ( j = 0; j < n_iter; j++ )
1998 {
1999 n_elem = n_elem_max - j;
2000 a_begin = a + j*lda + j*inca;
2001 b_begin = b + j*ldb + j*incb;
2002
2004 n_elem,
2005 a_begin, inca,
2006 b_begin, incb );
2007 }
2008 }
2009 else // if ( n_elem_is_ascending )
2010 {
2011 for ( j = 0; j < n_iter; j++ )
2012 {
2013 n_elem = bl1_min( j + 1, n_elem_max );
2014 a_begin = a + j*lda;
2015 b_begin = b + j*ldb;
2016
2018 n_elem,
2019 a_begin, inca,
2020 b_begin, incb );
2021 }
2022 }
2023}

References bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), bl1_zccopyv(), and bl1_zero_dim2().

Referenced by FLA_Copyrt_external().

◆ bl1_zccopymt()

void bl1_zccopymt ( trans1_t  trans,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)
1301{
1304 int lda, inca;
1305 int ldb, incb;
1306 int n_iter;
1307 int n_elem;
1308 int j;
1309 conj1_t conj;
1310
1311 // Return early if possible.
1312 if ( bl1_zero_dim2( m, n ) ) return;
1313
1314 // Handle cases where A and B are vectors to ensure that the underlying copy
1315 // gets invoked only once.
1316 if ( bl1_is_vector( m, n ) )
1317 {
1318 // Initialize with values appropriate for vectors.
1319 n_iter = 1;
1320 n_elem = bl1_vector_dim( m, n );
1321 lda = 1; // multiplied by zero when n_iter == 1; not needed.
1322 inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
1323 ldb = 1; // multiplied by zero when n_iter == 1; not needed.
1325 }
1326 else // matrix case
1327 {
1328 // Initialize with optimal values for column-major storage of B.
1329 n_iter = n;
1330 n_elem = m;
1331 lda = a_cs;
1332 inca = a_rs;
1333 ldb = b_cs;
1334 incb = b_rs;
1335
1336 // Handle the transposition of A.
1337 if ( bl1_does_trans( trans ) )
1338 {
1340 }
1341
1342 // An optimization: if B is row-major, then let's access the matrix by rows
1343 // instead of by columns for increased spatial locality.
1344 if ( bl1_is_row_storage( b_rs, b_cs ) )
1345 {
1349 }
1350 }
1351
1352 // Extract conj component from trans parameter.
1354
1355 for ( j = 0; j < n_iter; ++j )
1356 {
1357 a_begin = a + j*lda;
1358 b_begin = b + j*ldb;
1359
1361 n_elem,
1362 a_begin, inca,
1363 b_begin, incb );
1364 }
1365}

References bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zccopyv(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Copy_external(), and FLA_Copyt_external().

◆ bl1_zccopyv()

void bl1_zccopyv ( conj1_t  conj,
int  m,
dcomplex x,
int  incx,
scomplex y,
int  incy 
)
331{
332 dcomplex* chi;
333 scomplex* psi;
334 int i;
335
336 // Return early if possible.
337 if ( bl1_zero_dim1( m ) ) return;
338
339 // Initialize pointers.
340 chi = x;
341 psi = y;
342
343 for ( i = 0; i < m; ++i )
344 {
345 psi->real = chi->real;
346 psi->imag = chi->imag;
347
348 chi += incx;
349 psi += incy;
350 }
351
352 if ( bl1_is_conj( conj ) )
353 bl1_cconjv( m,
354 y, incy );
355}

References bl1_cconjv(), bl1_is_conj(), bl1_zero_dim1(), i, scomplex::imag, dcomplex::imag, scomplex::real, and dcomplex::real.

Referenced by bl1_zccopymr(), bl1_zccopymrt(), and bl1_zccopymt().

◆ bl1_zconjm()

void bl1_zconjm ( int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs 
)
73{
74 double m1 = bl1_dm1();
75 double* a_conj;
76 int lda, inca;
77 int n_iter;
78 int n_elem;
79 int j;
80
81 // Return early if possible.
82 if ( bl1_zero_dim2( m, n ) ) return;
83
84 // Handle cases where A is a vector to ensure that the underlying axpy
85 // gets invoked only once.
86 if ( bl1_is_vector( m, n ) )
87 {
88 // Initialize with values appropriate for a vector.
89 n_iter = 1;
90 n_elem = bl1_vector_dim( m, n );
91 lda = 1; // multiplied by zero when n_iter == 1; not needed.
93 }
94 else // matrix case
95 {
96 // Initialize with optimal values for column-major storage.
97 n_iter = n;
98 n_elem = m;
99 lda = a_cs;
100 inca = a_rs;
101
102 // An optimization: if A is row-major, then let's access the matrix
103 // by rows instead of by columns to increase spatial locality.
104 if ( bl1_is_row_storage( a_rs, a_cs ) )
105 {
108 }
109 }
110
111 for ( j = 0; j < n_iter; ++j )
112 {
113 a_conj = ( double* )( a + j*lda ) + 1;
114
116 &m1,
117 a_conj, 2*inca );
118 }
119}
double bl1_dm1(void)
Definition bl1_constants.c:182

References bl1_dm1(), bl1_dscal(), bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by bl1_zgemm(), and FLA_Conjugate().

◆ bl1_zconjmr()

void bl1_zconjmr ( uplo1_t  uplo,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs 
)
80{
81 double m1 = bl1_dm1();
82 double* a_conj;
83 int lda, inca;
84 int n_iter;
85 int n_elem_max;
86 int n_elem;
87 int j;
88
89 // Return early if possible.
90 if ( bl1_zero_dim2( m, n ) ) return;
91
92 // We initialize for column-major.
93 n_iter = n;
94 n_elem_max = m;
95 lda = a_cs;
96 inca = a_rs;
97
98 // An optimization: if A is row-major, then let's access the matrix
99 // by rows instead of by columns to increase spatial locality.
100 if ( bl1_is_row_storage( a_rs, a_cs ) )
101 {
104 bl1_toggle_uplo( uplo );
105 }
106
107 if ( bl1_is_upper( uplo ) )
108 {
109 for ( j = 0; j < n_iter; ++j )
110 {
111 n_elem = bl1_min( j + 1, n_elem_max );
112 a_conj = ( double* )( a + j*lda ) + 1;
113
115 &m1,
116 a_conj, 2*inca );
117 }
118 }
119 else // if ( bl1_is_lower( uplo ) )
120 {
121 for ( j = 0; j < n_iter; ++j )
122 {
123 n_elem = bl1_max( 0, n_elem_max - j );
124 a_conj = ( double* )( a + j*lda + j*inca ) + 1;
125
126 if ( n_elem <= 0 ) break;
127
129 &m1,
130 a_conj, 2*inca );
131 }
132 }
133}

References bl1_dm1(), bl1_dscal(), bl1_is_row_storage(), bl1_is_upper(), and bl1_zero_dim2().

Referenced by bl1_zhemm(), bl1_ztrmm(), bl1_ztrsm(), and FLA_Conjugate_r().

◆ bl1_zconjv()

void bl1_zconjv ( int  m,
dcomplex x,
int  incx 
)

◆ bl1_zcopy()

void bl1_zcopy ( int  m,
dcomplex x,
int  incx,
dcomplex y,
int  incy 
)
53{
54#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
55 cblas_zcopy( m,
56 x, incx,
57 y, incy );
58#else
59 F77_zcopy( &m,
60 x, &incx,
61 y, &incy );
62#endif
63}
void F77_zcopy(int *n, dcomplex *x, int *incx, dcomplex *y, int *incy)
void cblas_zcopy(const int N, const void *X, const int incX, void *Y, const int incY)

References cblas_zcopy(), and F77_zcopy().

Referenced by bl1_zcopymr(), bl1_zcopymt(), bl1_zcopyv(), and FLA_SA_LU_unb().

◆ bl1_zcopymr()

void bl1_zcopymr ( uplo1_t  uplo,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)
203{
206 int lda, inca;
207 int ldb, incb;
208 int n_iter;
209 int n_elem_max;
210 int n_elem;
211 int j;
212
213 // Return early if possible.
214 if ( bl1_zero_dim2( m, n ) ) return;
215
216 // We initialize for column-major.
217 n_iter = n;
218 n_elem_max = m;
219 lda = a_cs;
220 inca = a_rs;
221 ldb = b_cs;
222 incb = b_rs;
223
224 // An optimization: if A and B are both row-major, then let's access the
225 // matrices by rows instead of by columns for increased spatial locality.
227 {
231 bl1_toggle_uplo( uplo );
232 }
233
234
235 if ( bl1_is_upper( uplo ) )
236 {
237 for ( j = 0; j < n_iter; j++ )
238 {
239 n_elem = bl1_min( j + 1, n_elem_max );
240 a_begin = a + j*lda;
241 b_begin = b + j*ldb;
242
244 a_begin, inca,
245 b_begin, incb );
246 }
247 }
248 else // if ( bl1_is_lower( uplo ) )
249 {
250 for ( j = 0; j < n_iter; j++ )
251 {
252 n_elem = bl1_max( 0, n_elem_max - j );
253 a_begin = a + j*lda + j*inca;
254 b_begin = b + j*ldb + j*incb;
255
256 if ( n_elem <= 0 ) break;
257
259 a_begin, inca,
260 b_begin, incb );
261 }
262 }
263}
void bl1_zcopy(int m, dcomplex *x, int incx, dcomplex *y, int incy)
Definition bl1_copy.c:52

References bl1_is_row_storage(), bl1_is_upper(), bl1_zcopy(), and bl1_zero_dim2().

Referenced by bl1_zcreate_contigmr(), bl1_zfree_saved_contigmr(), bl1_zfree_saved_contigmsr(), and FLA_Copyr_external().

◆ bl1_zcopymrt()

void bl1_zcopymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)
329{
332 int lda, inca;
333 int ldb, incb;
334 int n_iter;
335 int n_elem;
336 int n_elem_max;
338 int j;
340
341 // Return early if possible.
342 if ( bl1_zero_dim2( m, n ) ) return;
343
344 // Initialize variables based on storage format of B and value of uplo.
345 if ( bl1_is_col_storage( b_rs, b_cs ) )
346 {
347 if ( bl1_is_lower( uplo ) )
348 {
349 n_iter = bl1_min( m, n );
350 n_elem_max = m;
351 lda = a_cs;
352 inca = a_rs;
353 ldb = b_cs;
354 incb = b_rs;
356 }
357 else // if ( bl1_is_upper( uplo ) )
358 {
359 n_iter = n;
360 n_elem_max = bl1_min( m, n );
361 lda = a_cs;
362 inca = a_rs;
363 ldb = b_cs;
364 incb = b_rs;
366 }
367 }
368 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
369 {
370 if ( bl1_is_lower( uplo ) )
371 {
372 n_iter = m;
373 n_elem_max = bl1_min( m, n );
374 lda = a_rs;
375 inca = a_cs;
376 ldb = b_rs;
377 incb = b_cs;
379 }
380 else // if ( bl1_is_upper( uplo ) )
381 {
382 n_iter = bl1_min( m, n );
383 n_elem_max = n;
384 lda = a_rs;
385 inca = a_cs;
386 ldb = b_rs;
387 incb = b_cs;
389 }
390 }
391
392 // Swap lda and inca if we're doing a transpose.
393 if ( bl1_does_trans( trans ) )
394 {
396 }
397
398 // Extract conj component from trans parameter.
400
401 // Choose the loop based on whether n_elem will be shrinking or growing
402 // with each iteration.
404 {
405 for ( j = 0; j < n_iter; j++ )
406 {
407 n_elem = n_elem_max - j;
408 a_begin = a + j*lda + j*inca;
409 b_begin = b + j*ldb + j*incb;
410
412 n_elem,
413 a_begin, inca,
414 b_begin, incb );
415 }
416 }
417 else // if ( n_elem_is_ascending )
418 {
419 for ( j = 0; j < n_iter; j++ )
420 {
421 n_elem = bl1_min( j + 1, n_elem_max );
422 a_begin = a + j*lda;
423 b_begin = b + j*ldb;
424
426 n_elem,
427 a_begin, inca,
428 b_begin, incb );
429 }
430 }
431}

References bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), bl1_zcopyv(), and bl1_zero_dim2().

Referenced by bl1_zhemm(), bl1_ztrmm(), bl1_ztrsm(), FLA_Copyrt_external(), FLA_Lyap_h_opz_var1(), FLA_Lyap_h_opz_var2(), FLA_Lyap_h_opz_var3(), FLA_Lyap_h_opz_var4(), FLA_Lyap_n_opz_var1(), FLA_Lyap_n_opz_var2(), FLA_Lyap_n_opz_var3(), and FLA_Lyap_n_opz_var4().

◆ bl1_zcopymt()

void bl1_zcopymt ( trans1_t  trans,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)
287{
290 int lda, inca;
291 int ldb, incb;
292 int n_iter;
293 int n_elem;
294 int j;
295
296 // Return early if possible.
297 if ( bl1_zero_dim2( m, n ) ) return;
298
299 // Handle cases where A and B are vectors to ensure that the underlying copy
300 // gets invoked only once.
301 if ( bl1_is_vector( m, n ) )
302 {
303 // Initialize with values appropriate for vectors.
304 n_iter = 1;
305 n_elem = bl1_vector_dim( m, n );
306 lda = 1; // multiplied by zero when n_iter == 1; not needed.
307 inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
308 ldb = 1; // multiplied by zero when n_iter == 1; not needed.
310 }
311 else // matrix case
312 {
313 // Initialize with optimal values for column-major storage.
314 n_iter = n;
315 n_elem = m;
316 lda = a_cs;
317 inca = a_rs;
318 ldb = b_cs;
319 incb = b_rs;
320
321 // Handle the transposition of A.
322 if ( bl1_does_trans( trans ) )
323 {
325 }
326
327 // An optimization: if B is row-major and if A is effectively row-major
328 // after a possible transposition, then let's access the matrix by rows
329 // instead of by columns for increased spatial locality.
330 if ( bl1_is_row_storage( b_rs, b_cs ) )
331 {
332 if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
334 {
338 }
339 }
340 }
341
342 for ( j = 0; j < n_iter; j++ )
343 {
344 a_begin = a + j*lda;
345 b_begin = b + j*ldb;
346
348 a_begin, inca,
349 b_begin, incb );
350
351 if ( bl1_does_conj( trans ) )
353 b_begin, incb );
354 }
355}

References bl1_does_conj(), bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zconjv(), bl1_zcopy(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by bl1_zcreate_contigm(), bl1_zcreate_contigmt(), bl1_zfree_saved_contigm(), bl1_zgemm(), bl1_zhemm(), bl1_zher2k(), bl1_zsymm(), bl1_zsyr2k(), bl1_ztrmmsx(), bl1_ztrsmsx(), FLA_Bsvd_v_opz_var2(), FLA_Copy_external(), FLA_Copyt_external(), and FLA_Tevd_v_opz_var2().

◆ bl1_zcopyv()

void bl1_zcopyv ( conj1_t  conj,
int  m,
dcomplex x,
int  incx,
dcomplex y,
int  incy 
)

◆ bl1_zdcopymr()

void bl1_zdcopymr ( uplo1_t  uplo,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
double b,
int  b_rs,
int  b_cs 
)
985{
987 double* b_begin;
988 int lda, inca;
989 int ldb, incb;
990 int n_iter;
991 int n_elem_max;
992 int n_elem;
993 int j;
994
995 // Return early if possible.
996 if ( bl1_zero_dim2( m, n ) ) return;
997
998 // We initialize for column-major.
999 n_iter = n;
1000 n_elem_max = m;
1001 lda = a_cs;
1002 inca = a_rs;
1003 ldb = b_cs;
1004 incb = b_rs;
1005
1006 // An optimization: if B is row-major, then let's access the matrix
1007 // by rows instead of by columns for increased spatial locality.
1008 if ( bl1_is_row_storage( b_rs, b_cs ) )
1009 {
1013 bl1_toggle_uplo( uplo );
1014 }
1015
1016
1017 if ( bl1_is_upper( uplo ) )
1018 {
1019 for ( j = 0; j < n_iter; j++ )
1020 {
1021 n_elem = bl1_min( j + 1, n_elem_max );
1022 a_begin = a + j*lda;
1023 b_begin = b + j*ldb;
1024
1026 n_elem,
1027 a_begin, inca,
1028 b_begin, incb );
1029 }
1030 }
1031 else // if ( bl1_is_lower( uplo ) )
1032 {
1033 for ( j = 0; j < n_iter; j++ )
1034 {
1035 n_elem = bl1_max( 0, n_elem_max - j );
1036 a_begin = a + j*lda + j*inca;
1037 b_begin = b + j*ldb + j*incb;
1038
1039 if ( n_elem <= 0 ) break;
1040
1042 n_elem,
1043 a_begin, inca,
1044 b_begin, incb );
1045 }
1046 }
1047}
void bl1_zdcopyv(conj1_t conj, int m, dcomplex *x, int incx, double *y, int incy)
Definition bl1_copyv.c:281

References bl1_is_row_storage(), bl1_is_upper(), bl1_zdcopyv(), bl1_zero_dim2(), and BLIS1_NO_CONJUGATE.

Referenced by FLA_Copyr_external().

◆ bl1_zdcopymrt()

void bl1_zdcopymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
double b,
int  b_rs,
int  b_cs 
)
1815{
1817 double* b_begin;
1818 int lda, inca;
1819 int ldb, incb;
1820 int n_iter;
1821 int n_elem;
1822 int n_elem_max;
1824 int j;
1825 conj1_t conj;
1826
1827 // Return early if possible.
1828 if ( bl1_zero_dim2( m, n ) ) return;
1829
1830 // Initialize variables based on storage format of B and value of uplo.
1831 if ( bl1_is_col_storage( b_rs, b_cs ) )
1832 {
1833 if ( bl1_is_lower( uplo ) )
1834 {
1835 n_iter = bl1_min( m, n );
1836 n_elem_max = m;
1837 lda = a_cs;
1838 inca = a_rs;
1839 ldb = b_cs;
1840 incb = b_rs;
1842 }
1843 else // if ( bl1_is_upper( uplo ) )
1844 {
1845 n_iter = n;
1846 n_elem_max = bl1_min( m, n );
1847 lda = a_cs;
1848 inca = a_rs;
1849 ldb = b_cs;
1850 incb = b_rs;
1852 }
1853 }
1854 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
1855 {
1856 if ( bl1_is_lower( uplo ) )
1857 {
1858 n_iter = m;
1859 n_elem_max = bl1_min( m, n );
1860 lda = a_rs;
1861 inca = a_cs;
1862 ldb = b_rs;
1863 incb = b_cs;
1865 }
1866 else // if ( bl1_is_upper( uplo ) )
1867 {
1868 n_iter = bl1_min( m, n );
1869 n_elem_max = n;
1870 lda = a_rs;
1871 inca = a_cs;
1872 ldb = b_rs;
1873 incb = b_cs;
1875 }
1876 }
1877
1878 // Swap lda and inca if we're doing a transpose.
1879 if ( bl1_does_trans( trans ) )
1880 {
1882 }
1883
1884 // Extract conj component from trans parameter.
1886
1887 // Choose the loop based on whether n_elem will be shrinking or growing
1888 // with each iteration.
1890 {
1891 for ( j = 0; j < n_iter; j++ )
1892 {
1893 n_elem = n_elem_max - j;
1894 a_begin = a + j*lda + j*inca;
1895 b_begin = b + j*ldb + j*incb;
1896
1898 n_elem,
1899 a_begin, inca,
1900 b_begin, incb );
1901 }
1902 }
1903 else // if ( n_elem_is_ascending )
1904 {
1905 for ( j = 0; j < n_iter; j++ )
1906 {
1907 n_elem = bl1_min( j + 1, n_elem_max );
1908 a_begin = a + j*lda;
1909 b_begin = b + j*ldb;
1910
1912 n_elem,
1913 a_begin, inca,
1914 b_begin, incb );
1915 }
1916 }
1917}

References bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), bl1_zdcopyv(), and bl1_zero_dim2().

Referenced by FLA_Copyrt_external().

◆ bl1_zdcopymt()

void bl1_zdcopymt ( trans1_t  trans,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
double b,
int  b_rs,
int  b_cs 
)
1099{
1101 double* b_begin;
1102 int lda, inca;
1103 int ldb, incb;
1104 int n_iter;
1105 int n_elem;
1106 int j;
1107 conj1_t conj;
1108
1109 // Return early if possible.
1110 if ( bl1_zero_dim2( m, n ) ) return;
1111
1112 // Handle cases where A and B are vectors to ensure that the underlying copy
1113 // gets invoked only once.
1114 if ( bl1_is_vector( m, n ) )
1115 {
1116 // Initialize with values appropriate for vectors.
1117 n_iter = 1;
1118 n_elem = bl1_vector_dim( m, n );
1119 lda = 1; // multiplied by zero when n_iter == 1; not needed.
1120 inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
1121 ldb = 1; // multiplied by zero when n_iter == 1; not needed.
1123 }
1124 else // matrix case
1125 {
1126 // Initialize with optimal values for column-major storage of B.
1127 n_iter = n;
1128 n_elem = m;
1129 lda = a_cs;
1130 inca = a_rs;
1131 ldb = b_cs;
1132 incb = b_rs;
1133
1134 // Handle the transposition of A.
1135 if ( bl1_does_trans( trans ) )
1136 {
1138 }
1139
1140 // An optimization: if B is row-major, then let's access the matrix by rows
1141 // instead of by columns for increased spatial locality.
1142 if ( bl1_is_row_storage( b_rs, b_cs ) )
1143 {
1147 }
1148 }
1149
1150 // Extract conj component from trans parameter.
1152
1153 for ( j = 0; j < n_iter; ++j )
1154 {
1155 a_begin = a + j*lda;
1156 b_begin = b + j*ldb;
1157
1159 n_elem,
1160 a_begin, inca,
1161 b_begin, incb );
1162 }
1163}

References bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zdcopyv(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Copy_external(), and FLA_Copyt_external().

◆ bl1_zdcopyv()

void bl1_zdcopyv ( conj1_t  conj,
int  m,
dcomplex x,
int  incx,
double y,
int  incy 
)
282{
283 dcomplex* chi;
284 double* psi;
285 int i;
286
287 // Return early if possible.
288 if ( bl1_zero_dim1( m ) ) return;
289
290 // Initialize pointers.
291 chi = x;
292 psi = y;
293
294 for ( i = 0; i < m; ++i )
295 {
296 *psi = chi->real;
297
298 chi += incx;
299 psi += incy;
300 }
301}

References bl1_zero_dim1(), i, and dcomplex::real.

Referenced by bl1_zdcopymr(), bl1_zdcopymrt(), and bl1_zdcopymt().

◆ bl1_zdinvscalm()

void bl1_zdinvscalm ( conj1_t  conj,
int  m,
int  n,
double alpha,
dcomplex a,
int  a_rs,
int  a_cs 
)
222{
223 double alpha_inv;
225 int lda, inca;
226 int n_iter;
227 int n_elem;
228 int j;
229
230 // Return early if possible.
231 if ( bl1_zero_dim2( m, n ) ) return;
232 if ( bl1_deq1( alpha ) ) return;
233
234 // Handle cases where A is a vector to ensure that the underlying axpy
235 // gets invoked only once.
236 if ( bl1_is_vector( m, n ) )
237 {
238 // Initialize with values appropriate for a vector.
239 n_iter = 1;
240 n_elem = bl1_vector_dim( m, n );
241 lda = 1; // multiplied by zero when n_iter == 1; not needed.
243 }
244 else // matrix case
245 {
246 // Initialize with optimal values for column-major storage.
247 n_iter = n;
248 n_elem = m;
249 lda = a_cs;
250 inca = a_rs;
251
252 // An optimization: if A is row-major, then let's access the matrix
253 // by rows instead of by columns to increase spatial locality.
254 if ( bl1_is_row_storage( a_rs, a_cs ) )
255 {
258 }
259 }
260
262
263 for ( j = 0; j < n_iter; j++ )
264 {
265 a_begin = a + j*lda;
266
268 &alpha_inv,
269 a_begin, inca );
270 }
271}
void bl1_zdscal(int n, double *alpha, dcomplex *x, int incx)
Definition bl1_scal.c:65

References bl1_dinvert2s(), bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zdscal(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Inv_scal_external(), and FLA_Inv_scalc_external().

◆ bl1_zdinvscalv()

void bl1_zdinvscalv ( conj1_t  conj,
int  n,
double alpha,
dcomplex x,
int  incx 
)
66{
67 double alpha_inv;
68
69 if ( bl1_deq1( alpha ) ) return;
70
71 alpha_inv = 1.0 / *alpha;
72
73 bl1_zdscal( n,
74 &alpha_inv,
75 x, incx );
76}

References bl1_zdscal().

◆ bl1_zdot()

void bl1_zdot ( conj1_t  conj,
int  n,
dcomplex x,
int  incx,
dcomplex y,
int  incy,
dcomplex rho 
)
66{
67#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
68 if ( bl1_is_conj( conj ) )
69 {
71 x, incx,
72 y, incy,
73 rho );
74 }
75 else // if ( !bl1_is_conj( conj ) )
76 {
78 x, incx,
79 y, incy,
80 rho );
81 }
82#else
84 n,
85 x, incx,
86 y, incy,
87 rho );
88#endif
89}
void bl1_zdot_in(conj1_t conj, int n, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *rho)
Definition bl1_dot.c:146
void cblas_zdotc_sub(const int N, const void *X, const int incX, const void *Y, const int incY, void *dotc)
void cblas_zdotu_sub(const int N, const void *X, const int incX, const void *Y, const int incY, void *dotu)

References bl1_is_conj(), bl1_zdot_in(), cblas_zdotc_sub(), cblas_zdotu_sub(), and rho.

Referenced by bl1_zdot2s(), bl1_zdots(), FLA_Bidiag_UT_u_step_ofz_var2(), FLA_Bidiag_UT_u_step_ofz_var3(), FLA_Bidiag_UT_u_step_ofz_var4(), FLA_Bidiag_UT_u_step_opz_var2(), FLA_Bidiag_UT_u_step_opz_var3(), FLA_Bidiag_UT_u_step_opz_var4(), FLA_Bidiag_UT_u_step_opz_var5(), FLA_Dot_external(), FLA_Dotc_external(), FLA_Fused_Ahx_Axpy_Ax_opz_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_opz_var1(), FLA_Fused_UZhu_ZUhu_opz_var1(), FLA_Hess_UT_step_ofz_var2(), FLA_Hess_UT_step_ofz_var3(), FLA_Hess_UT_step_ofz_var4(), FLA_Hess_UT_step_opz_var2(), FLA_Hess_UT_step_opz_var3(), FLA_Hess_UT_step_opz_var4(), FLA_Hess_UT_step_opz_var5(), FLA_Sylv_hh_opz_var1(), FLA_Sylv_hn_opz_var1(), FLA_Sylv_nh_opz_var1(), FLA_Sylv_nn_opz_var1(), FLA_Tridiag_UT_l_step_ofz_var2(), FLA_Tridiag_UT_l_step_ofz_var3(), FLA_Tridiag_UT_l_step_opz_var1(), FLA_Tridiag_UT_l_step_opz_var2(), and FLA_Tridiag_UT_l_step_opz_var3().

◆ bl1_zdot2s()

void bl1_zdot2s ( conj1_t  conj,
int  n,
dcomplex alpha,
dcomplex x,
int  incx,
dcomplex y,
int  incy,
dcomplex beta,
dcomplex rho 
)
71{
78
79 alphac_d.imag *= -1.0;
80
82 n,
83 x, incx,
84 y, incy,
85 &dotxy );
86
88 n,
89 y, incy,
90 x, incx,
91 &dotyx );
92
93 rho->real = beta_d.real * rho_d.real - beta_d.imag * rho_d.imag +
94 alpha_d.real * dotxy.real - alpha_d.imag * dotxy.imag +
95 alphac_d.real * dotyx.real - alphac_d.imag * dotyx.imag;
96 rho->imag = beta_d.real * rho_d.imag + beta_d.imag * rho_d.real +
97 alpha_d.real * dotxy.imag + alpha_d.imag * dotxy.real +
98 alphac_d.real * dotyx.imag + alphac_d.imag * dotyx.real;
99}
void bl1_zdot(conj1_t conj, int n, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *rho)
Definition bl1_dot.c:65
double imag
Definition blis_type_defs.h:139

References bl1_zdot(), dcomplex::imag, dcomplex::real, and rho.

Referenced by FLA_Dot2cs_external(), FLA_Dot2s_external(), FLA_Eig_gest_il_opz_var1(), FLA_Eig_gest_il_opz_var2(), FLA_Eig_gest_il_opz_var3(), FLA_Eig_gest_iu_opz_var1(), FLA_Eig_gest_iu_opz_var2(), FLA_Eig_gest_iu_opz_var3(), FLA_Eig_gest_nl_opz_var1(), FLA_Eig_gest_nl_opz_var2(), FLA_Eig_gest_nu_opz_var1(), FLA_Eig_gest_nu_opz_var2(), FLA_Lyap_h_opz_var1(), FLA_Lyap_h_opz_var2(), FLA_Lyap_h_opz_var3(), FLA_Lyap_n_opz_var1(), FLA_Lyap_n_opz_var2(), and FLA_Lyap_n_opz_var3().

◆ bl1_zdot_in()

void bl1_zdot_in ( conj1_t  conj,
int  n,
dcomplex x,
int  incx,
dcomplex y,
int  incy,
dcomplex rho 
)
147{
148 dcomplex* xip;
149 dcomplex* yip;
150 dcomplex xi;
151 dcomplex yi;
153 int i;
154
155 rho_temp.real = 0.0;
156 rho_temp.imag = 0.0;
157
158 xip = x;
159 yip = y;
160
161 if ( bl1_is_conj( conj ) )
162 {
163 for ( i = 0; i < n; ++i )
164 {
165 xi.real = xip->real;
166 xi.imag = xip->imag;
167 yi.real = yip->real;
168 yi.imag = yip->imag;
169
170 rho_temp.real += xi.real * yi.real - -xi.imag * yi.imag;
171 rho_temp.imag += xi.real * yi.imag + -xi.imag * yi.real;
172
173 xip += incx;
174 yip += incy;
175 }
176 }
177 else // if ( !bl1_is_conj( conj ) )
178 {
179 for ( i = 0; i < n; ++i )
180 {
181 xi.real = xip->real;
182 xi.imag = xip->imag;
183 yi.real = yip->real;
184 yi.imag = yip->imag;
185
186 rho_temp.real += xi.real * yi.real - xi.imag * yi.imag;
187 rho_temp.imag += xi.real * yi.imag + xi.imag * yi.real;
188
189 xip += incx;
190 yip += incy;
191 }
192 }
193
194 rho->real = rho_temp.real;
195 rho->imag = rho_temp.imag;
196}

References bl1_is_conj(), i, dcomplex::imag, dcomplex::real, and rho.

Referenced by bl1_zdot().

◆ bl1_zdots()

void bl1_zdots ( conj1_t  conj,
int  n,
dcomplex alpha,
dcomplex x,
int  incx,
dcomplex y,
int  incy,
dcomplex beta,
dcomplex rho 
)

◆ bl1_zdscal()

void bl1_zdscal ( int  n,
double alpha,
dcomplex x,
int  incx 
)
66{
67#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
68 cblas_zdscal( n,
69 *alpha,
70 x, incx );
71#else
72 F77_zdscal( &n,
73 alpha,
74 x, &incx );
75#endif
76}
void F77_zdscal(int *n, double *alpha, dcomplex *y, int *incy)
void cblas_zdscal(const int N, const double alpha, void *X, const int incX)

References cblas_zdscal(), and F77_zdscal().

Referenced by bl1_zdinvscalm(), bl1_zdinvscalv(), bl1_zdscalm(), bl1_zdscalmr(), and bl1_zdscalv().

◆ bl1_zdscalm()

void bl1_zdscalm ( conj1_t  conj,
int  m,
int  n,
double alpha,
dcomplex a,
int  a_rs,
int  a_cs 
)
222{
223 double alpha_conj;
225 int lda, inca;
226 int n_iter;
227 int n_elem;
228 int j;
229
230 // Return early if possible.
231 if ( bl1_zero_dim2( m, n ) ) return;
232 if ( bl1_deq1( alpha ) ) return;
233
234 // Handle cases where A is a vector to ensure that the underlying axpy
235 // gets invoked only once.
236 if ( bl1_is_vector( m, n ) )
237 {
238 // Initialize with values appropriate for a vector.
239 n_iter = 1;
240 n_elem = bl1_vector_dim( m, n );
241 lda = 1; // multiplied by zero when n_iter == 1; not needed.
243 }
244 else // matrix case
245 {
246 // Initialize with optimal values for column-major storage.
247 n_iter = n;
248 n_elem = m;
249 lda = a_cs;
250 inca = a_rs;
251
252 // An optimization: if A is row-major, then let's access the matrix
253 // by rows instead of by columns to increase spatial locality.
254 if ( bl1_is_row_storage( a_rs, a_cs ) )
255 {
258 }
259 }
260
262
263 for ( j = 0; j < n_iter; j++ )
264 {
265 a_begin = a + j*lda;
266
268 &alpha_conj,
269 a_begin, inca );
270 }
271}

References bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zdscal(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Scal_external(), and FLA_Scalc_external().

◆ bl1_zdscalmr()

void bl1_zdscalmr ( uplo1_t  uplo,
int  m,
int  n,
double alpha,
dcomplex a,
int  a_rs,
int  a_cs 
)
238{
240 int lda, inca;
241 int n_iter;
242 int n_elem_max;
243 int n_elem;
244 int j;
245
246 // Return early if possible.
247 if ( bl1_zero_dim2( m, n ) ) return;
248 if ( bl1_deq1( alpha ) ) return;
249
250 // We initialize for column-major.
251 n_iter = n;
252 n_elem_max = m;
253 lda = a_cs;
254 inca = a_rs;
255
256 // An optimization: if A is row-major, then let's access the matrix
257 // by rows instead of by columns to increase spatial locality.
258 if ( bl1_is_row_storage( a_rs, a_cs ) )
259 {
262 bl1_toggle_uplo( uplo );
263 }
264
265 if ( bl1_is_upper( uplo ) )
266 {
267 for ( j = 0; j < n_iter; j++ )
268 {
269 n_elem = bl1_min( j + 1, n_elem_max );
270 a_begin = a + j*lda;
271
273 alpha,
274 a_begin, inca );
275 }
276 }
277 else // if ( bl1_is_lower( uplo ) )
278 {
279 for ( j = 0; j < n_iter; j++ )
280 {
281 n_elem = bl1_max( 0, n_elem_max - j );
282 a_begin = a + j*lda + j*inca;
283
284 if ( n_elem <= 0 ) break;
285
287 alpha,
288 a_begin, inca );
289 }
290 }
291}

References bl1_is_row_storage(), bl1_is_upper(), bl1_zdscal(), and bl1_zero_dim2().

Referenced by bl1_zher2k(), bl1_zherk(), and FLA_Scalr_external().

◆ bl1_zdscalv()

void bl1_zdscalv ( conj1_t  conj,
int  n,
double alpha,
dcomplex x,
int  incx 
)
62{
63 // Return early if possible.
64 if ( bl1_zero_dim1( n ) ) return;
65 if ( bl1_deq1( alpha ) ) return;
66
67 bl1_zdscal( n,
68 alpha,
69 x, incx );
70}

References bl1_zdscal(), and bl1_zero_dim1().

Referenced by bl1_zdapdiagmv(), FLA_Bsvd_ext_opz_var1(), FLA_Bsvd_v_opz_var1(), and FLA_Bsvd_v_opz_var2().

◆ bl1_zfnorm()

void bl1_zfnorm ( int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
double norm 
)
176{
177 dcomplex* a_ij;
178 double sum;
179 int lda, inca;
180 int n_iter;
181 int n_elem;
182 int i, j;
183
184 // Return early if possible.
185 if ( bl1_zero_dim2( m, n ) ) return;
186
187 // Handle cases where A is a vector separately.
188 if ( bl1_is_vector( m, n ) )
189 {
190 // Initialize with values appropriate for vectors.
191 n_iter = 1;
192 n_elem = bl1_vector_dim( m, n );
193 lda = 1; // multiplied by zero when n_iter == 1; not needed.
195 }
196 else // matrix case
197 {
198 // Initialize with optimal values for column-major storage.
199 n_iter = n;
200 n_elem = m;
201 lda = a_cs;
202 inca = a_rs;
203
204 // An optimization: if A is row-major, then let's access the matrix by
205 // rows instead of by columns for increased spatial locality.
206 if ( bl1_is_row_storage( a_rs, a_cs ) )
207 {
210 }
211 }
212
213 // Initialize the accumulator variable.
214 sum = 0.0;
215
216 for ( j = 0; j < n_iter; j++ )
217 {
218 for ( i = 0; i < n_elem; i++ )
219 {
220 a_ij = a + i*inca + j*lda;
221 sum += a_ij->real * a_ij->real + a_ij->imag * a_ij->imag;
222 }
223 }
224
225 // Compute the norm and store the result.
226 *norm = sqrt( sum );
227}

References bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), BLIS1_NO_TRANSPOSE, i, dcomplex::imag, and dcomplex::real.

Referenced by FLA_Norm_frob().

◆ bl1_zinvscalm()

void bl1_zinvscalm ( conj1_t  conj,
int  m,
int  n,
dcomplex alpha,
dcomplex a,
int  a_rs,
int  a_cs 
)
274{
277 int lda, inca;
278 int n_iter;
279 int n_elem;
280 int j;
281
282 // Return early if possible.
283 if ( bl1_zero_dim2( m, n ) ) return;
284 if ( bl1_zeq1( alpha ) ) return;
285
286 // Handle cases where A is a vector to ensure that the underlying axpy
287 // gets invoked only once.
288 if ( bl1_is_vector( m, n ) )
289 {
290 // Initialize with values appropriate for a vector.
291 n_iter = 1;
292 n_elem = bl1_vector_dim( m, n );
293 lda = 1; // multiplied by zero when n_iter == 1; not needed.
295 }
296 else // matrix case
297 {
298 // Initialize with optimal values for column-major storage.
299 n_iter = n;
300 n_elem = m;
301 lda = a_cs;
302 inca = a_rs;
303
304 // An optimization: if A is row-major, then let's access the matrix
305 // by rows instead of by columns to increase spatial locality.
306 if ( bl1_is_row_storage( a_rs, a_cs ) )
307 {
310 }
311 }
312
314
315 for ( j = 0; j < n_iter; j++ )
316 {
317 a_begin = a + j*lda;
318
320 &alpha_inv,
321 a_begin, inca );
322 }
323}
void bl1_zinvert2s(conj1_t conj, dcomplex *alpha, dcomplex *beta)
Definition bl1_invert2s.c:44

References bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), bl1_zinvert2s(), bl1_zscal(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Inv_scal_external(), and FLA_Inv_scalc_external().

◆ bl1_zinvscalv()

void bl1_zinvscalv ( conj1_t  conj,
int  n,
dcomplex alpha,
dcomplex x,
int  incx 
)

◆ bl1_znrm2()

void bl1_znrm2 ( int  n,
dcomplex x,
int  incx,
double norm 
)
47{
48#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
49 *norm = cblas_dznrm2( n,
50 x, incx );
51#else
52 *norm = F77_dznrm2( &n,
53 x, &incx );
54#endif
55}
double F77_dznrm2(int *n, dcomplex *x, int *incx)
double cblas_dznrm2(const int N, const void *X, const int incX)

References cblas_dznrm2(), and F77_dznrm2().

Referenced by FLA_Househ2_UT_l_opz(), FLA_Househ2s_UT_l_opz(), FLA_Househ3UD_UT_opz(), and FLA_Nrm2_external().

◆ bl1_zscal()

void bl1_zscal ( int  n,
dcomplex alpha,
dcomplex x,
int  incx 
)
79{
80#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
81 cblas_zscal( n,
82 alpha,
83 x, incx );
84#else
85 F77_zscal( &n,
86 alpha,
87 x, &incx );
88#endif
89}
void F77_zscal(int *n, dcomplex *alpha, dcomplex *y, int *incy)
void cblas_zscal(const int N, const void *alpha, void *X, const int incX)

References cblas_zscal(), and F77_zscal().

Referenced by bl1_zaxpysmt(), bl1_zaxpysv(), bl1_zinvscalm(), bl1_zinvscalv(), bl1_zscalm(), bl1_zscalmr(), bl1_zscalv(), and FLA_SA_LU_unb().

◆ bl1_zscalm()

void bl1_zscalm ( conj1_t  conj,
int  m,
int  n,
dcomplex alpha,
dcomplex a,
int  a_rs,
int  a_cs 
)
274{
277 int lda, inca;
278 int n_iter;
279 int n_elem;
280 int j;
281
282 // Return early if possible.
283 if ( bl1_zero_dim2( m, n ) ) return;
284 if ( bl1_zeq1( alpha ) ) return;
285
286 // Handle cases where A is a vector to ensure that the underlying axpy
287 // gets invoked only once.
288 if ( bl1_is_vector( m, n ) )
289 {
290 // Initialize with values appropriate for a vector.
291 n_iter = 1;
292 n_elem = bl1_vector_dim( m, n );
293 lda = 1; // multiplied by zero when n_iter == 1; not needed.
295 }
296 else // matrix case
297 {
298 // Initialize with optimal values for column-major storage.
299 n_iter = n;
300 n_elem = m;
301 lda = a_cs;
302 inca = a_rs;
303
304 // An optimization: if A is row-major, then let's access the matrix
305 // by rows instead of by columns to increase spatial locality.
306 if ( bl1_is_row_storage( a_rs, a_cs ) )
307 {
310 }
311 }
312
314
315 for ( j = 0; j < n_iter; j++ )
316 {
317 a_begin = a + j*lda;
318
320 &alpha_conj,
321 a_begin, inca );
322 }
323}

References bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), bl1_zscal(), and BLIS1_NO_TRANSPOSE.

Referenced by bl1_zgemm(), bl1_zhemm(), bl1_zsymm(), bl1_ztrmmsx(), bl1_ztrsmsx(), FLA_Lyap_h_opz_var1(), FLA_Lyap_h_opz_var2(), FLA_Lyap_h_opz_var3(), FLA_Lyap_h_opz_var4(), FLA_Lyap_n_opz_var1(), FLA_Lyap_n_opz_var2(), FLA_Lyap_n_opz_var3(), FLA_Lyap_n_opz_var4(), FLA_Scal_external(), and FLA_Scalc_external().

◆ bl1_zscalmr()

void bl1_zscalmr ( uplo1_t  uplo,
int  m,
int  n,
dcomplex alpha,
dcomplex a,
int  a_rs,
int  a_cs 
)
294{
296 int lda, inca;
297 int n_iter;
298 int n_elem_max;
299 int n_elem;
300 int j;
301
302 // Return early if possible.
303 if ( bl1_zero_dim2( m, n ) ) return;
304 if ( bl1_zeq1( alpha ) ) return;
305
306 // We initialize for column-major.
307 n_iter = n;
308 n_elem_max = m;
309 lda = a_cs;
310 inca = a_rs;
311
312 // An optimization: if A is row-major, then let's access the matrix
313 // by rows instead of by columns to increase spatial locality.
314 if ( bl1_is_row_storage( a_rs, a_cs ) )
315 {
318 bl1_toggle_uplo( uplo );
319 }
320
321 if ( bl1_is_upper( uplo ) )
322 {
323 for ( j = 0; j < n_iter; j++ )
324 {
325 n_elem = bl1_min( j + 1, n_elem_max );
326 a_begin = a + j*lda;
327
329 alpha,
330 a_begin, inca );
331 }
332 }
333 else // if ( bl1_is_lower( uplo ) )
334 {
335 for ( j = 0; j < n_iter; j++ )
336 {
337 n_elem = bl1_max( 0, n_elem_max - j );
338 a_begin = a + j*lda + j*inca;
339
340 if ( n_elem <= 0 ) break;
341
343 alpha,
344 a_begin, inca );
345 }
346 }
347}

References bl1_is_row_storage(), bl1_is_upper(), bl1_zero_dim2(), and bl1_zscal().

Referenced by FLA_Scalr_external().

◆ bl1_zscalv()

void bl1_zscalv ( conj1_t  conj,
int  n,
dcomplex alpha,
dcomplex x,
int  incx 
)

◆ bl1_zscopymr()

void bl1_zscopymr ( uplo1_t  uplo,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
float b,
int  b_rs,
int  b_cs 
)
659{
661 float* b_begin;
662 int lda, inca;
663 int ldb, incb;
664 int n_iter;
665 int n_elem_max;
666 int n_elem;
667 int j;
668
669 // Return early if possible.
670 if ( bl1_zero_dim2( m, n ) ) return;
671
672 // We initialize for column-major.
673 n_iter = n;
674 n_elem_max = m;
675 lda = a_cs;
676 inca = a_rs;
677 ldb = b_cs;
678 incb = b_rs;
679
680 // An optimization: if B is row-major, then let's access the matrix
681 // by rows instead of by columns for increased spatial locality.
682 if ( bl1_is_row_storage( b_rs, b_cs ) )
683 {
687 bl1_toggle_uplo( uplo );
688 }
689
690
691 if ( bl1_is_upper( uplo ) )
692 {
693 for ( j = 0; j < n_iter; j++ )
694 {
695 n_elem = bl1_min( j + 1, n_elem_max );
696 a_begin = a + j*lda;
697 b_begin = b + j*ldb;
698
700 n_elem,
701 a_begin, inca,
702 b_begin, incb );
703 }
704 }
705 else // if ( bl1_is_lower( uplo ) )
706 {
707 for ( j = 0; j < n_iter; j++ )
708 {
709 n_elem = bl1_max( 0, n_elem_max - j );
710 a_begin = a + j*lda + j*inca;
711 b_begin = b + j*ldb + j*incb;
712
713 if ( n_elem <= 0 ) break;
714
716 n_elem,
717 a_begin, inca,
718 b_begin, incb );
719 }
720 }
721}
void bl1_zscopyv(conj1_t conj, int m, dcomplex *x, int incx, float *y, int incy)
Definition bl1_copyv.c:191

References bl1_is_row_storage(), bl1_is_upper(), bl1_zero_dim2(), bl1_zscopyv(), and BLIS1_NO_CONJUGATE.

Referenced by FLA_Copyr_external().

◆ bl1_zscopymrt()

void bl1_zscopymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
float b,
int  b_rs,
int  b_cs 
)
1709{
1711 float* b_begin;
1712 int lda, inca;
1713 int ldb, incb;
1714 int n_iter;
1715 int n_elem;
1716 int n_elem_max;
1718 int j;
1719 conj1_t conj;
1720
1721 // Return early if possible.
1722 if ( bl1_zero_dim2( m, n ) ) return;
1723
1724 // Initialize variables based on storage format of B and value of uplo.
1725 if ( bl1_is_col_storage( b_rs, b_cs ) )
1726 {
1727 if ( bl1_is_lower( uplo ) )
1728 {
1729 n_iter = bl1_min( m, n );
1730 n_elem_max = m;
1731 lda = a_cs;
1732 inca = a_rs;
1733 ldb = b_cs;
1734 incb = b_rs;
1736 }
1737 else // if ( bl1_is_upper( uplo ) )
1738 {
1739 n_iter = n;
1740 n_elem_max = bl1_min( m, n );
1741 lda = a_cs;
1742 inca = a_rs;
1743 ldb = b_cs;
1744 incb = b_rs;
1746 }
1747 }
1748 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
1749 {
1750 if ( bl1_is_lower( uplo ) )
1751 {
1752 n_iter = m;
1753 n_elem_max = bl1_min( m, n );
1754 lda = a_rs;
1755 inca = a_cs;
1756 ldb = b_rs;
1757 incb = b_cs;
1759 }
1760 else // if ( bl1_is_upper( uplo ) )
1761 {
1762 n_iter = bl1_min( m, n );
1763 n_elem_max = n;
1764 lda = a_rs;
1765 inca = a_cs;
1766 ldb = b_rs;
1767 incb = b_cs;
1769 }
1770 }
1771
1772 // Swap lda and inca if we're doing a transpose.
1773 if ( bl1_does_trans( trans ) )
1774 {
1776 }
1777
1778 // Extract conj component from trans parameter.
1780
1781 // Choose the loop based on whether n_elem will be shrinking or growing
1782 // with each iteration.
1784 {
1785 for ( j = 0; j < n_iter; j++ )
1786 {
1787 n_elem = n_elem_max - j;
1788 a_begin = a + j*lda + j*inca;
1789 b_begin = b + j*ldb + j*incb;
1790
1792 n_elem,
1793 a_begin, inca,
1794 b_begin, incb );
1795 }
1796 }
1797 else // if ( n_elem_is_ascending )
1798 {
1799 for ( j = 0; j < n_iter; j++ )
1800 {
1801 n_elem = bl1_min( j + 1, n_elem_max );
1802 a_begin = a + j*lda;
1803 b_begin = b + j*ldb;
1804
1806 n_elem,
1807 a_begin, inca,
1808 b_begin, incb );
1809 }
1810 }
1811}

References bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), bl1_zero_dim2(), and bl1_zscopyv().

Referenced by FLA_Copyrt_external().

◆ bl1_zscopymt()

void bl1_zscopymt ( trans1_t  trans,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
float b,
int  b_rs,
int  b_cs 
)
763{
765 float* b_begin;
766 int lda, inca;
767 int ldb, incb;
768 int n_iter;
769 int n_elem;
770 int j;
772
773 // Return early if possible.
774 if ( bl1_zero_dim2( m, n ) ) return;
775
776 // Handle cases where A and B are vectors to ensure that the underlying copy
777 // gets invoked only once.
778 if ( bl1_is_vector( m, n ) )
779 {
780 // Initialize with values appropriate for vectors.
781 n_iter = 1;
782 n_elem = bl1_vector_dim( m, n );
783 lda = 1; // multiplied by zero when n_iter == 1; not needed.
784 inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
785 ldb = 1; // multiplied by zero when n_iter == 1; not needed.
787 }
788 else // matrix case
789 {
790 // Initialize with optimal values for column-major storage of B.
791 n_iter = n;
792 n_elem = m;
793 lda = a_cs;
794 inca = a_rs;
795 ldb = b_cs;
796 incb = b_rs;
797
798 // Handle the transposition of A.
799 if ( bl1_does_trans( trans ) )
800 {
802 }
803
804 // An optimization: if B is row-major, then let's access the matrix by rows
805 // instead of by columns for increased spatial locality.
806 if ( bl1_is_row_storage( b_rs, b_cs ) )
807 {
811 }
812 }
813
814 // Extract conj component from trans parameter.
816
817 for ( j = 0; j < n_iter; ++j )
818 {
819 a_begin = a + j*lda;
820 b_begin = b + j*ldb;
821
823 n_elem,
824 a_begin, inca,
825 b_begin, incb );
826 }
827}

References bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), bl1_zscopyv(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Copy_external(), and FLA_Copyt_external().

◆ bl1_zscopyv()

void bl1_zscopyv ( conj1_t  conj,
int  m,
dcomplex x,
int  incx,
float y,
int  incy 
)
192{
193 dcomplex* chi;
194 float* psi;
195 int i;
196
197 // Return early if possible.
198 if ( bl1_zero_dim1( m ) ) return;
199
200 // Initialize pointers.
201 chi = x;
202 psi = y;
203
204 for ( i = 0; i < m; ++i )
205 {
206 *psi = chi->real;
207
208 chi += incx;
209 psi += incy;
210 }
211}

References bl1_zero_dim1(), i, and dcomplex::real.

Referenced by bl1_zscopymr(), bl1_zscopymrt(), and bl1_zscopymt().

◆ bl1_zswap()

void bl1_zswap ( int  n,
dcomplex x,
int  incx,
dcomplex y,
int  incy 
)
53{
54#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
55 cblas_zswap( n,
56 x, incx,
57 y, incy );
58#else
59 F77_zswap( &n,
60 x, &incx,
61 y, &incy );
62#endif
63}
void F77_zswap(int *n, dcomplex *x, int *incx, dcomplex *y, int *incy)
void cblas_zswap(const int N, void *X, const int incX, void *Y, const int incY)

References cblas_zswap(), and F77_zswap().

Referenced by bl1_zswapmt(), bl1_zswapv(), FLA_SA_Apply_pivots(), and FLA_SA_LU_unb().

◆ bl1_zswapmt()

void bl1_zswapmt ( trans1_t  trans,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)
223{
226 int lda, inca;
227 int ldb, incb;
228 int n_iter;
229 int n_elem;
230 int j;
231
232 // Return early if possible.
233 if ( bl1_zero_dim2( m, n ) ) return;
234
235 // Handle cases where A and B are vectors to ensure that the underlying copy
236 // gets invoked only once.
237 if ( bl1_is_vector( m, n ) )
238 {
239 // Initialize with values appropriate for vectors.
240 n_iter = 1;
241 n_elem = bl1_vector_dim( m, n );
242 lda = 1; // multiplied by zero when n_iter == 1; not needed.
243 inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
244 ldb = 1; // multiplied by zero when n_iter == 1; not needed.
246 }
247 else // matrix case
248 {
249 // Initialize with optimal values for column-major storage.
250 n_iter = n;
251 n_elem = m;
252 lda = a_cs;
253 inca = a_rs;
254 ldb = b_cs;
255 incb = b_rs;
256
257 // Handle the transposition of A.
258 if ( bl1_does_trans( trans ) )
259 {
261 }
262
263 // An optimization: if B is row-major and if A is effectively row-major
264 // after a possible transposition, then let's access the matrix by rows
265 // instead of by columns for increased spatial locality.
266 if ( bl1_is_row_storage( b_rs, b_cs ) )
267 {
268 if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
270 {
274 }
275 }
276 }
277
278 for ( j = 0; j < n_iter; j++ )
279 {
280 a_begin = a + j*lda;
281 b_begin = b + j*ldb;
282
284 a_begin, inca,
285 b_begin, incb );
286
287 if ( bl1_does_conj( trans ) )
289 a_begin, inca );
290
291 if ( bl1_does_conj( trans ) )
293 b_begin, incb );
294 }
295}
void bl1_zswap(int n, dcomplex *x, int incx, dcomplex *y, int incy)
Definition bl1_swap.c:52

References bl1_does_conj(), bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zconjv(), bl1_zero_dim2(), bl1_zswap(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Swap_external(), and FLA_Swapt_external().

◆ bl1_zswapv()

void bl1_zswapv ( int  n,
dcomplex x,
int  incx,
dcomplex y,
int  incy 
)

◆ bl1_zzcopymr()

void bl1_zzcopymr ( uplo1_t  uplo,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)
1247{
1250 int lda, inca;
1251 int ldb, incb;
1252 int n_iter;
1253 int n_elem_max;
1254 int n_elem;
1255 int j;
1256
1257 // Return early if possible.
1258 if ( bl1_zero_dim2( m, n ) ) return;
1259
1260 // We initialize for column-major.
1261 n_iter = n;
1262 n_elem_max = m;
1263 lda = a_cs;
1264 inca = a_rs;
1265 ldb = b_cs;
1266 incb = b_rs;
1267
1268 // An optimization: if B is row-major, then let's access the matrix
1269 // by rows instead of by columns for increased spatial locality.
1270 if ( bl1_is_row_storage( b_rs, b_cs ) )
1271 {
1275 bl1_toggle_uplo( uplo );
1276 }
1277
1278
1279 if ( bl1_is_upper( uplo ) )
1280 {
1281 for ( j = 0; j < n_iter; j++ )
1282 {
1283 n_elem = bl1_min( j + 1, n_elem_max );
1284 a_begin = a + j*lda;
1285 b_begin = b + j*ldb;
1286
1288 n_elem,
1289 a_begin, inca,
1290 b_begin, incb );
1291 }
1292 }
1293 else // if ( bl1_is_lower( uplo ) )
1294 {
1295 for ( j = 0; j < n_iter; j++ )
1296 {
1297 n_elem = bl1_max( 0, n_elem_max - j );
1298 a_begin = a + j*lda + j*inca;
1299 b_begin = b + j*ldb + j*incb;
1300
1301 if ( n_elem <= 0 ) break;
1302
1304 n_elem,
1305 a_begin, inca,
1306 b_begin, incb );
1307 }
1308 }
1309}

References bl1_is_row_storage(), bl1_is_upper(), bl1_zcopyv(), bl1_zero_dim2(), and BLIS1_NO_CONJUGATE.

◆ bl1_zzcopymrt()

void bl1_zzcopymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)
2027{
2030 int lda, inca;
2031 int ldb, incb;
2032 int n_iter;
2033 int n_elem;
2034 int n_elem_max;
2036 int j;
2037 conj1_t conj;
2038
2039 // Return early if possible.
2040 if ( bl1_zero_dim2( m, n ) ) return;
2041
2042 // Initialize variables based on storage format of B and value of uplo.
2043 if ( bl1_is_col_storage( b_rs, b_cs ) )
2044 {
2045 if ( bl1_is_lower( uplo ) )
2046 {
2047 n_iter = bl1_min( m, n );
2048 n_elem_max = m;
2049 lda = a_cs;
2050 inca = a_rs;
2051 ldb = b_cs;
2052 incb = b_rs;
2054 }
2055 else // if ( bl1_is_upper( uplo ) )
2056 {
2057 n_iter = n;
2058 n_elem_max = bl1_min( m, n );
2059 lda = a_cs;
2060 inca = a_rs;
2061 ldb = b_cs;
2062 incb = b_rs;
2064 }
2065 }
2066 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
2067 {
2068 if ( bl1_is_lower( uplo ) )
2069 {
2070 n_iter = m;
2071 n_elem_max = bl1_min( m, n );
2072 lda = a_rs;
2073 inca = a_cs;
2074 ldb = b_rs;
2075 incb = b_cs;
2077 }
2078 else // if ( bl1_is_upper( uplo ) )
2079 {
2080 n_iter = bl1_min( m, n );
2081 n_elem_max = n;
2082 lda = a_rs;
2083 inca = a_cs;
2084 ldb = b_rs;
2085 incb = b_cs;
2087 }
2088 }
2089
2090 // Swap lda and inca if we're doing a transpose.
2091 if ( bl1_does_trans( trans ) )
2092 {
2094 }
2095
2096 // Extract conj component from trans parameter.
2098
2099 // Choose the loop based on whether n_elem will be shrinking or growing
2100 // with each iteration.
2102 {
2103 for ( j = 0; j < n_iter; j++ )
2104 {
2105 n_elem = n_elem_max - j;
2106 a_begin = a + j*lda + j*inca;
2107 b_begin = b + j*ldb + j*incb;
2108
2110 n_elem,
2111 a_begin, inca,
2112 b_begin, incb );
2113 }
2114 }
2115 else // if ( n_elem_is_ascending )
2116 {
2117 for ( j = 0; j < n_iter; j++ )
2118 {
2119 n_elem = bl1_min( j + 1, n_elem_max );
2120 a_begin = a + j*lda;
2121 b_begin = b + j*ldb;
2122
2124 n_elem,
2125 a_begin, inca,
2126 b_begin, incb );
2127 }
2128 }
2129}

References bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), bl1_zcopyv(), and bl1_zero_dim2().

◆ bl1_zzcopymt()

void bl1_zzcopymt ( trans1_t  trans,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)
1369{
1372 int lda, inca;
1373 int ldb, incb;
1374 int n_iter;
1375 int n_elem;
1376 int j;
1377 conj1_t conj;
1378
1379 // Return early if possible.
1380 if ( bl1_zero_dim2( m, n ) ) return;
1381
1382 // Handle cases where A and B are vectors to ensure that the underlying copy
1383 // gets invoked only once.
1384 if ( bl1_is_vector( m, n ) )
1385 {
1386 // Initialize with values appropriate for vectors.
1387 n_iter = 1;
1388 n_elem = bl1_vector_dim( m, n );
1389 lda = 1; // multiplied by zero when n_iter == 1; not needed.
1390 inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
1391 ldb = 1; // multiplied by zero when n_iter == 1; not needed.
1393 }
1394 else // matrix case
1395 {
1396 // Initialize with optimal values for column-major storage of B.
1397 n_iter = n;
1398 n_elem = m;
1399 lda = a_cs;
1400 inca = a_rs;
1401 ldb = b_cs;
1402 incb = b_rs;
1403
1404 // Handle the transposition of A.
1405 if ( bl1_does_trans( trans ) )
1406 {
1408 }
1409
1410 // An optimization: if B is row-major, then let's access the matrix by rows
1411 // instead of by columns for increased spatial locality.
1412 if ( bl1_is_row_storage( b_rs, b_cs ) )
1413 {
1417 }
1418 }
1419
1420 // Extract conj component from trans parameter.
1422
1423 for ( j = 0; j < n_iter; ++j )
1424 {
1425 a_begin = a + j*lda;
1426 b_begin = b + j*ldb;
1427
1429 n_elem,
1430 a_begin, inca,
1431 b_begin, incb );
1432 }
1433}

References bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zcopyv(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.