libflame revision_anchor
Functions
FLA_Apply_G_rf.h File Reference

(r)

Go to the source code of this file.

Functions

FLA_Error FLA_Apply_G_rf_opt_var1 (FLA_Obj G, FLA_Obj A)
 
FLA_Error FLA_Apply_G_rf_ops_var1 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opd_var1 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opc_var1 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opz_var1 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asm_var1 (FLA_Obj G, FLA_Obj A)
 
FLA_Error FLA_Apply_G_rf_ass_var1 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asd_var1 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asc_var1 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asz_var1 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_blk_var1 (FLA_Obj G, FLA_Obj A, dim_t b_alg)
 
FLA_Error FLA_Apply_G_rf_bls_var1 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_bld_var1 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_blc_var1 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_blz_var1 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_opt_var2 (FLA_Obj G, FLA_Obj A)
 
FLA_Error FLA_Apply_G_rf_ops_var2 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opd_var2 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opc_var2 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opz_var2 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asm_var2 (FLA_Obj G, FLA_Obj A)
 
FLA_Error FLA_Apply_G_rf_ass_var2 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asd_var2 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asc_var2 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asz_var2 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_blk_var2 (FLA_Obj G, FLA_Obj A, dim_t b_alg)
 
FLA_Error FLA_Apply_G_rf_bls_var2 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_bld_var2 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_blc_var2 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_blz_var2 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_opt_var3 (FLA_Obj G, FLA_Obj A)
 
FLA_Error FLA_Apply_G_rf_ops_var3 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opd_var3 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opc_var3 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opz_var3 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asm_var3 (FLA_Obj G, FLA_Obj A)
 
FLA_Error FLA_Apply_G_rf_ass_var3 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asd_var3 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asc_var3 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asz_var3 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_blk_var3 (FLA_Obj G, FLA_Obj A, dim_t b_alg)
 
FLA_Error FLA_Apply_G_rf_bls_var3 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_bld_var3 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_blc_var3 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_blz_var3 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_opt_var4 (FLA_Obj G, FLA_Obj A)
 
FLA_Error FLA_Apply_G_rf_ops_var4 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opd_var4 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opc_var4 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opz_var4 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asm_var4 (FLA_Obj G, FLA_Obj A)
 
FLA_Error FLA_Apply_G_rf_ass_var4 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asd_var4 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asc_var4 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asz_var4 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_blk_var4 (FLA_Obj G, FLA_Obj A, dim_t b_alg)
 
FLA_Error FLA_Apply_G_rf_bls_var4 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_bld_var4 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_blc_var4 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_blz_var4 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_opt_var5 (FLA_Obj G, FLA_Obj A)
 
FLA_Error FLA_Apply_G_rf_ops_var5 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opd_var5 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opc_var5 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opz_var5 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asm_var5 (FLA_Obj G, FLA_Obj A)
 
FLA_Error FLA_Apply_G_rf_ass_var5 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asd_var5 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asc_var5 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asz_var5 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_blk_var5 (FLA_Obj G, FLA_Obj A, dim_t b_alg)
 
FLA_Error FLA_Apply_G_rf_bls_var5 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_bld_var5 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_blc_var5 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_blz_var5 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_opt_var6 (FLA_Obj G, FLA_Obj A)
 
FLA_Error FLA_Apply_G_rf_ops_var6 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opd_var6 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opc_var6 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opz_var6 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asm_var6 (FLA_Obj G, FLA_Obj A)
 
FLA_Error FLA_Apply_G_rf_ass_var6 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asd_var6 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asc_var6 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asz_var6 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_blk_var6 (FLA_Obj G, FLA_Obj A, dim_t b_alg)
 
FLA_Error FLA_Apply_G_rf_bls_var6 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_bld_var6 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_blc_var6 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_blz_var6 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_opt_var7 (FLA_Obj G, FLA_Obj A)
 
FLA_Error FLA_Apply_G_rf_ops_var7 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opd_var7 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opc_var7 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opz_var7 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asm_var7 (FLA_Obj G, FLA_Obj A)
 
FLA_Error FLA_Apply_G_rf_ass_var7 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asd_var7 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asc_var7 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asz_var7 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_blk_var7 (FLA_Obj G, FLA_Obj A, dim_t b_alg)
 
FLA_Error FLA_Apply_G_rf_bls_var7 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_bld_var7 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_blc_var7 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_blz_var7 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_opt_var8 (FLA_Obj G, FLA_Obj A)
 
FLA_Error FLA_Apply_G_rf_ops_var8 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opd_var8 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opc_var8 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opz_var8 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asm_var8 (FLA_Obj G, FLA_Obj A)
 
FLA_Error FLA_Apply_G_rf_ass_var8 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asd_var8 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asc_var8 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asz_var8 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_blk_var8 (FLA_Obj G, FLA_Obj A, dim_t b_alg)
 
FLA_Error FLA_Apply_G_rf_bls_var8 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_bld_var8 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_blc_var8 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_blz_var8 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_opt_var9 (FLA_Obj G, FLA_Obj A)
 
FLA_Error FLA_Apply_G_rf_ops_var9 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opd_var9 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opc_var9 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opz_var9 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asm_var9 (FLA_Obj G, FLA_Obj A)
 
FLA_Error FLA_Apply_G_rf_ass_var9 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asd_var9 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asc_var9 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asz_var9 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_blk_var9 (FLA_Obj G, FLA_Obj A, dim_t b_alg)
 
FLA_Error FLA_Apply_G_rf_bls_var9 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_bld_var9 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_blc_var9 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_blz_var9 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_asm_var3b (FLA_Obj G, FLA_Obj A)
 
FLA_Error FLA_Apply_G_rf_ass_var3b (int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asd_var3b (int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asc_var3b (int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asz_var3b (int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_blk_var3b (FLA_Obj G, FLA_Obj A, dim_t b_alg)
 
FLA_Error FLA_Apply_G_rf_bls_var3b (int k_G, int m_A, int n_A, int i_k, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_bld_var3b (int k_G, int m_A, int n_A, int i_k, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_blc_var3b (int k_G, int m_A, int n_A, int i_k, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_blz_var3b (int k_G, int m_A, int n_A, int i_k, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_asm_var5b (FLA_Obj G, FLA_Obj A)
 
FLA_Error FLA_Apply_G_rf_ass_var5b (int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asd_var5b (int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asc_var5b (int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asz_var5b (int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_blk_var5b (FLA_Obj G, FLA_Obj A, dim_t b_alg)
 
FLA_Error FLA_Apply_G_rf_bls_var5b (int k_G, int m_A, int n_A, int i_k, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_bld_var5b (int k_G, int m_A, int n_A, int i_k, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_blc_var5b (int k_G, int m_A, int n_A, int i_k, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_blz_var5b (int k_G, int m_A, int n_A, int i_k, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_asm_var6b (FLA_Obj G, FLA_Obj A)
 
FLA_Error FLA_Apply_G_rf_ass_var6b (int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asd_var6b (int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asc_var6b (int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asz_var6b (int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_blk_var6b (FLA_Obj G, FLA_Obj A, dim_t b_alg)
 
FLA_Error FLA_Apply_G_rf_bls_var6b (int k_G, int m_A, int n_A, int i_k, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_bld_var6b (int k_G, int m_A, int n_A, int i_k, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_blc_var6b (int k_G, int m_A, int n_A, int i_k, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_blz_var6b (int k_G, int m_A, int n_A, int i_k, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_asm_var8b (FLA_Obj G, FLA_Obj A)
 
FLA_Error FLA_Apply_G_rf_ass_var8b (int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asd_var8b (int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asc_var8b (int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asz_var8b (int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_blk_var8b (FLA_Obj G, FLA_Obj A, dim_t b_alg)
 
FLA_Error FLA_Apply_G_rf_bls_var8b (int k_G, int m_A, int n_A, int i_k, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_bld_var8b (int k_G, int m_A, int n_A, int i_k, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_blc_var8b (int k_G, int m_A, int n_A, int i_k, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_blz_var8b (int k_G, int m_A, int n_A, int i_k, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_bhs_var3 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_bhd_var3 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_bhc_var3 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_bhz_var3 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, FLA_Obj *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_asm_var9b (FLA_Obj G, FLA_Obj A)
 
FLA_Error FLA_Apply_G_rf_ass_var9b (int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asd_var9b (int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asc_var9b (int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asz_var9b (int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_blk_var9b (FLA_Obj G, FLA_Obj A, dim_t b_alg)
 
FLA_Error FLA_Apply_G_rf_bls_var9b (int k_G, int m_A, int n_A, int i_k, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_bld_var9b (int k_G, int m_A, int n_A, int i_k, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_blc_var9b (int k_G, int m_A, int n_A, int i_k, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_blz_var9b (int k_G, int m_A, int n_A, int i_k, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg)
 

Function Documentation

◆ FLA_Apply_G_rf_asc_var1()

FLA_Error FLA_Apply_G_rf_asc_var1 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A 
)
220{
221 float one = bl1_s1();
222 float zero = bl1_s0();
223 int nG_app = n_A - 1;
224 int l, j;
225 float gamma;
226 float sigma;
227 scomplex* a1;
228 scomplex* a2;
229 scomplex* g1;
230 scomplex* g11;
231
232 g1 = buff_G;
233
234 for ( l = 0; l < k_G; ++l )
235 {
236 a1 = buff_A;
237 a2 = buff_A + cs_A;
238 g11 = g1;
239
240 for ( j = 0; j < nG_app; ++j )
241 {
242 gamma = g11->real;
243 sigma = g11->imag;
244
245 // Skip the current iteration if the rotation is identity.
246 if ( gamma != one || sigma != zero )
247 {
249 &gamma,
250 &sigma,
251 a1, 1,
252 a2, 1 );
253 }
254
255 a1 += cs_A;
256 a2 += cs_A;
257 g11 += rs_G;
258 }
259
260 g1 += cs_G;
261 }
262
263 return FLA_SUCCESS;
264}
int i
Definition bl1_axmyv2.c:145
float bl1_s0(void)
Definition bl1_constants.c:111
float bl1_s1(void)
Definition bl1_constants.c:47
Definition blis_type_defs.h:133
float real
Definition blis_type_defs.h:134

References bl1_s0(), bl1_s1(), i, and scomplex::real.

Referenced by FLA_Apply_G_rf_asc_var2(), FLA_Apply_G_rf_asc_var3(), FLA_Apply_G_rf_asc_var6(), FLA_Apply_G_rf_asc_var9(), FLA_Apply_G_rf_asm_var1(), and FLA_Apply_G_rf_blc_var1().

◆ FLA_Apply_G_rf_asc_var2()

FLA_Error FLA_Apply_G_rf_asc_var2 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A 
)
344{
345 float one = bl1_s1();
346 float zero = bl1_s0();
347 float gamma;
348 float sigma;
349 scomplex* a1;
350 scomplex* a2;
351 scomplex* g11;
352 int j, g, k;
353 int nG, nG_app;
354 int k_minus_1;
355
356 k_minus_1 = k_G - 1;
357 nG = n_A - 1;
358
359 // Use the simple variant for nG < 2(k - 1).
360 if ( nG < k_minus_1 || k_G == 1 )
361 {
363 m_A,
364 n_A,
365 buff_G, rs_G, cs_G,
366 buff_A, rs_A, cs_A );
367 return FLA_SUCCESS;
368 }
369
370
371 // Start-up phase.
372
373 for ( j = 0; j < k_minus_1; ++j )
374 {
375 nG_app = j + 1;
376
377 for ( k = 0, g = nG_app - 1; k < nG_app; ++k, --g )
378 {
379 g11 = buff_G + (g )*rs_G + (k )*cs_G;
380 a1 = buff_A + (g )*cs_A;
381 a2 = buff_A + (g + 1)*cs_A;
382
383 gamma = g11->real;
384 sigma = g11->imag;
385
386 // Skip the current iteration if the rotation is identity.
387 if ( gamma == one && sigma == zero ) continue;
388
390 &gamma,
391 &sigma,
392 a1, 1,
393 a2, 1 );
394 }
395 }
396
397 // Pipeline stage
398
399 for ( j = k_minus_1; j < nG; ++j )
400 {
401 nG_app = k_G;
402
403 for ( k = 0, g = j; k < nG_app; ++k, --g )
404 {
405 g11 = buff_G + (g )*rs_G + (k )*cs_G;
406 a1 = buff_A + (g )*cs_A;
407 a2 = buff_A + (g + 1)*cs_A;
408
409 gamma = g11->real;
410 sigma = g11->imag;
411
412 // Skip the current iteration if the rotation is identity.
413 if ( gamma == one && sigma == zero ) continue;
414
416 &gamma,
417 &sigma,
418 a1, 1,
419 a2, 1 );
420 }
421 }
422
423 // Shutdown stage
424
425 for ( j = nG - k_minus_1; j < nG; ++j )
426 {
427 nG_app = nG - j;
428
429 for ( k = k_G - nG_app, g = nG - 1; k < k_G; ++k, --g )
430 {
431 g11 = buff_G + (g )*rs_G + (k )*cs_G;
432 a1 = buff_A + (g )*cs_A;
433 a2 = buff_A + (g + 1)*cs_A;
434
435 gamma = g11->real;
436 sigma = g11->imag;
437
438 // Skip the current iteration if the rotation is identity.
439 if ( gamma == one && sigma == zero ) continue;
440
442 &gamma,
443 &sigma,
444 a1, 1,
445 a2, 1 );
446 }
447 }
448
449 return FLA_SUCCESS;
450}
FLA_Error FLA_Apply_G_rf_asc_var1(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var1.c:215

References bl1_s0(), bl1_s1(), FLA_Apply_G_rf_asc_var1(), and i.

Referenced by FLA_Apply_G_rf_asm_var2(), and FLA_Apply_G_rf_blc_var2().

◆ FLA_Apply_G_rf_asc_var3()

FLA_Error FLA_Apply_G_rf_asc_var3 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A 
)
1024{
1025 float one = bl1_s1();
1026 float zero = bl1_s0();
1027 float gamma23_k1;
1028 float sigma23_k1;
1029 float gamma34_k1;
1030 float sigma34_k1;
1031 float gamma12_k2;
1032 float sigma12_k2;
1033 float gamma23_k2;
1034 float sigma23_k2;
1035 scomplex* a1;
1036 scomplex* a2;
1037 scomplex* a3;
1038 scomplex* a4;
1043 int i, j, g, k;
1044 int nG, nG_app;
1045 int n_iter;
1046 int n_left;
1047 int k_minus_1;
1048 int n_fuse;
1049 int k_fuse;
1052 int has_ident;
1053
1054 k_minus_1 = k_G - 1;
1055 nG = n_A - 1;
1056 n_fuse = 2;
1057 k_fuse = 2;
1058
1059 // Use the simple variant for nG < (k - 1) or k == 1.
1060 if ( nG < 2*k_minus_1 || k_G == 1 )
1061 {
1063 m_A,
1064 n_A,
1065 buff_G, rs_G, cs_G,
1066 buff_A, rs_A, cs_A );
1067 return FLA_SUCCESS;
1068 }
1069
1070
1071 // Start-up phase.
1072
1073 for ( j = -1; j < k_minus_1; j += n_fuse )
1074 {
1075 nG_app = j + 2;
1076 n_iter = nG_app / k_fuse;
1077 //n_iter = nG_app % k_fuse;
1078 n_left = 1;
1079
1080 for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
1081 {
1082 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1083 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1084 g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
1085 g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
1086 a1 = buff_A + (g - 1)*cs_A;
1087 a2 = buff_A + (g )*cs_A;
1088 a3 = buff_A + (g + 1)*cs_A;
1089 a4 = buff_A + (g + 2)*cs_A;
1090
1091 gamma23_k1 = g23_k1->real;
1092 sigma23_k1 = g23_k1->imag;
1093 gamma34_k1 = g34_k1->real;
1094 sigma34_k1 = g34_k1->imag;
1095 gamma12_k2 = g12_k2->real;
1096 sigma12_k2 = g12_k2->imag;
1097 gamma23_k2 = g23_k2->real;
1098 sigma23_k2 = g23_k2->imag;
1099
1106
1107 if ( has_ident )
1108 {
1109 // Apply to pairs of columns as needed.
1110
1111 if ( !is_ident23_k1 )
1113 &gamma23_k1,
1114 &sigma23_k1,
1115 a2, 1,
1116 a3, 1 );
1117
1118 if ( !is_ident34_k1 )
1120 &gamma34_k1,
1121 &sigma34_k1,
1122 a3, 1,
1123 a4, 1 );
1124
1125 if ( !is_ident12_k2 )
1127 &gamma12_k2,
1128 &sigma12_k2,
1129 a1, 1,
1130 a2, 1 );
1131
1132 if ( !is_ident23_k2 )
1134 &gamma23_k2,
1135 &sigma23_k2,
1136 a2, 1,
1137 a3, 1 );
1138 }
1139 else
1140 {
1141 // Apply to all four columns.
1142
1144 &gamma23_k1,
1145 &sigma23_k1,
1146 &gamma34_k1,
1147 &sigma34_k1,
1148 &gamma12_k2,
1149 &sigma12_k2,
1150 &gamma23_k2,
1151 &sigma23_k2,
1152 a1, 1,
1153 a2, 1,
1154 a3, 1,
1155 a4, 1 );
1156 }
1157 }
1158
1159 if ( n_left == 1 )
1160 {
1161 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1162 a3 = buff_A + (g + 1)*cs_A;
1163 a4 = buff_A + (g + 2)*cs_A;
1164
1165 gamma34_k1 = g34_k1->real;
1166 sigma34_k1 = g34_k1->imag;
1167
1169
1170 if ( !is_ident34_k1 )
1172 &gamma34_k1,
1173 &sigma34_k1,
1174 a3, 1,
1175 a4, 1 );
1176 }
1177 }
1178
1179 // Pipeline stage
1180
1181 for ( ; j < nG - 1; j += n_fuse )
1182 {
1183 nG_app = k_G;
1184 n_iter = nG_app / k_fuse;
1185 n_left = nG_app % k_fuse;
1186
1187 for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
1188 {
1189 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1190 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1191 g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
1192 g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
1193 a1 = buff_A + (g - 1)*cs_A;
1194 a2 = buff_A + (g )*cs_A;
1195 a3 = buff_A + (g + 1)*cs_A;
1196 a4 = buff_A + (g + 2)*cs_A;
1197
1198 gamma23_k1 = g23_k1->real;
1199 sigma23_k1 = g23_k1->imag;
1200 gamma34_k1 = g34_k1->real;
1201 sigma34_k1 = g34_k1->imag;
1202 gamma12_k2 = g12_k2->real;
1203 sigma12_k2 = g12_k2->imag;
1204 gamma23_k2 = g23_k2->real;
1205 sigma23_k2 = g23_k2->imag;
1206
1213
1214 if ( has_ident )
1215 {
1216 // Apply to pairs of columns as needed.
1217
1218 if ( !is_ident23_k1 )
1220 &gamma23_k1,
1221 &sigma23_k1,
1222 a2, 1,
1223 a3, 1 );
1224
1225 if ( !is_ident34_k1 )
1227 &gamma34_k1,
1228 &sigma34_k1,
1229 a3, 1,
1230 a4, 1 );
1231
1232 if ( !is_ident12_k2 )
1234 &gamma12_k2,
1235 &sigma12_k2,
1236 a1, 1,
1237 a2, 1 );
1238
1239 if ( !is_ident23_k2 )
1241 &gamma23_k2,
1242 &sigma23_k2,
1243 a2, 1,
1244 a3, 1 );
1245 }
1246 else
1247 {
1248 // Apply to all four columns.
1249
1251 &gamma23_k1,
1252 &sigma23_k1,
1253 &gamma34_k1,
1254 &sigma34_k1,
1255 &gamma12_k2,
1256 &sigma12_k2,
1257 &gamma23_k2,
1258 &sigma23_k2,
1259 a1, 1,
1260 a2, 1,
1261 a3, 1,
1262 a4, 1 );
1263 }
1264 }
1265
1266 if ( n_left == 1 )
1267 {
1268 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1269 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1270 a2 = buff_A + (g )*cs_A;
1271 a3 = buff_A + (g + 1)*cs_A;
1272 a4 = buff_A + (g + 2)*cs_A;
1273
1274 gamma23_k1 = g23_k1->real;
1275 sigma23_k1 = g23_k1->imag;
1276 gamma34_k1 = g34_k1->real;
1277 sigma34_k1 = g34_k1->imag;
1278
1281
1282 if ( !is_ident23_k1 && is_ident34_k1 )
1283 {
1285 &gamma23_k1,
1286 &sigma23_k1,
1287 a2, 1,
1288 a3, 1 );
1289 }
1290 else if ( is_ident23_k1 && !is_ident34_k1 )
1291 {
1293 &gamma34_k1,
1294 &sigma34_k1,
1295 a3, 1,
1296 a4, 1 );
1297 }
1298 else
1299 {
1301 &gamma23_k1,
1302 &sigma23_k1,
1303 &gamma34_k1,
1304 &sigma34_k1,
1305 a2, 1,
1306 a3, 1,
1307 a4, 1 );
1308 }
1309 }
1310 }
1311
1312 // Shutdown stage
1313
1314 for ( j = nG % n_fuse; j < k_G; j += n_fuse )
1315 {
1316 g = nG - 1;
1317 k = j;
1318
1319 //n_left = 1;
1320 //if ( n_left == 1 )
1321 {
1322 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1323 a2 = buff_A + (g )*cs_A;
1324 a3 = buff_A + (g + 1)*cs_A;
1325
1326 gamma23_k1 = g23_k1->real;
1327 sigma23_k1 = g23_k1->imag;
1328
1330
1331 if ( !is_ident23_k1 )
1333 &gamma23_k1,
1334 &sigma23_k1,
1335 a2, 1,
1336 a3, 1 );
1337 ++k;
1338 --g;
1339 }
1340
1341 nG_app = k_minus_1 - j;
1342 n_iter = nG_app / k_fuse;
1343 n_left = nG_app % k_fuse;
1344
1345 for ( i = 0; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
1346 {
1347 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1348 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1349 g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
1350 g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
1351 a1 = buff_A + (g - 1)*cs_A;
1352 a2 = buff_A + (g )*cs_A;
1353 a3 = buff_A + (g + 1)*cs_A;
1354 a4 = buff_A + (g + 2)*cs_A;
1355
1356 gamma23_k1 = g23_k1->real;
1357 sigma23_k1 = g23_k1->imag;
1358 gamma34_k1 = g34_k1->real;
1359 sigma34_k1 = g34_k1->imag;
1360 gamma12_k2 = g12_k2->real;
1361 sigma12_k2 = g12_k2->imag;
1362 gamma23_k2 = g23_k2->real;
1363 sigma23_k2 = g23_k2->imag;
1364
1371
1372 if ( has_ident )
1373 {
1374 // Apply to pairs of columns as needed.
1375
1376 if ( !is_ident23_k1 )
1378 &gamma23_k1,
1379 &sigma23_k1,
1380 a2, 1,
1381 a3, 1 );
1382
1383 if ( !is_ident34_k1 )
1385 &gamma34_k1,
1386 &sigma34_k1,
1387 a3, 1,
1388 a4, 1 );
1389
1390 if ( !is_ident12_k2 )
1392 &gamma12_k2,
1393 &sigma12_k2,
1394 a1, 1,
1395 a2, 1 );
1396
1397 if ( !is_ident23_k2 )
1399 &gamma23_k2,
1400 &sigma23_k2,
1401 a2, 1,
1402 a3, 1 );
1403 }
1404 else
1405 {
1406 // Apply to all four columns.
1407
1409 &gamma23_k1,
1410 &sigma23_k1,
1411 &gamma34_k1,
1412 &sigma34_k1,
1413 &gamma12_k2,
1414 &sigma12_k2,
1415 &gamma23_k2,
1416 &sigma23_k2,
1417 a1, 1,
1418 a2, 1,
1419 a3, 1,
1420 a4, 1 );
1421 }
1422 }
1423
1424 if ( n_left == 1 )
1425 {
1426 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1427 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1428 a2 = buff_A + (g )*cs_A;
1429 a3 = buff_A + (g + 1)*cs_A;
1430 a4 = buff_A + (g + 2)*cs_A;
1431
1432 gamma23_k1 = g23_k1->real;
1433 sigma23_k1 = g23_k1->imag;
1434 gamma34_k1 = g34_k1->real;
1435 sigma34_k1 = g34_k1->imag;
1436
1439
1440 if ( !is_ident23_k1 && is_ident34_k1 )
1441 {
1443 &gamma23_k1,
1444 &sigma23_k1,
1445 a2, 1,
1446 a3, 1 );
1447 }
1448 else if ( is_ident23_k1 && !is_ident34_k1 )
1449 {
1451 &gamma34_k1,
1452 &sigma34_k1,
1453 a3, 1,
1454 a4, 1 );
1455 }
1456 else
1457 {
1459 &gamma23_k1,
1460 &sigma23_k1,
1461 &gamma34_k1,
1462 &sigma34_k1,
1463 a2, 1,
1464 a3, 1,
1465 a4, 1 );
1466 }
1467 }
1468 }
1469
1470 return FLA_SUCCESS;
1471}
int n_left
Definition bl1_axmyv2.c:149

References bl1_s0(), bl1_s1(), FLA_Apply_G_rf_asc_var1(), i, and n_left.

Referenced by FLA_Apply_G_rf_asm_var3().

◆ FLA_Apply_G_rf_asc_var3b()

FLA_Error FLA_Apply_G_rf_asc_var3b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
int  iTL,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A 
)
614{
616
617 return FLA_SUCCESS;
618}

References i.

Referenced by FLA_Apply_G_rf_asm_var3b().

◆ FLA_Apply_G_rf_asc_var4()

FLA_Error FLA_Apply_G_rf_asc_var4 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_asc_var5()

FLA_Error FLA_Apply_G_rf_asc_var5 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_asc_var5b()

FLA_Error FLA_Apply_G_rf_asc_var5b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
int  iTL,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_asc_var6()

FLA_Error FLA_Apply_G_rf_asc_var6 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A 
)
680{
681 float one = bl1_s1();
682 float zero = bl1_s0();
683 float gamma12;
684 float sigma12;
685 float gamma23;
686 float sigma23;
687 scomplex* a1;
688 scomplex* a2;
689 scomplex* a3;
690 scomplex* g12;
691 scomplex* g23;
692 int i, j, g, k;
693 int nG, nG_app;
694 int n_iter;
695 int n_left;
696 int k_minus_1;
697 int n_fuse;
699
700 k_minus_1 = k_G - 1;
701 nG = n_A - 1;
702 n_fuse = 2;
703
704 // Use the simple variant for nG < (k - 1) or k == 1.
705 if ( nG < k_minus_1 || k_G == 1 )
706 {
708 m_A,
709 n_A,
710 buff_G, rs_G, cs_G,
711 buff_A, rs_A, cs_A );
712 return FLA_SUCCESS;
713 }
714
715
716 // Start-up phase.
717
718 for ( j = 0; j < k_minus_1; ++j )
719 {
720 nG_app = j + 1;
721 n_iter = nG_app / n_fuse;
722 n_left = nG_app % n_fuse;
723
724 for ( i = 0, k = 0, g = nG_app - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
725 {
726 g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
727 g23 = buff_G + (g )*rs_G + (k )*cs_G;
728 a1 = buff_A + (g - 1)*cs_A;
729 a2 = buff_A + (g )*cs_A;
730 a3 = buff_A + (g + 1)*cs_A;
731
732 gamma12 = g12->real;
733 sigma12 = g12->imag;
734 gamma23 = g23->real;
735 sigma23 = g23->imag;
736
737 is_ident12 = ( gamma12 == one && sigma12 == zero );
738 is_ident23 = ( gamma23 == one && sigma23 == zero );
739
740 if ( !is_ident12 && is_ident23 )
741 {
742 // Apply only to columns 1 and 2.
743
745 &gamma12,
746 &sigma12,
747 a1, 1,
748 a2, 1 );
749 }
750 else if ( is_ident12 && !is_ident23 )
751 {
752 // Apply only to columns 2 and 3.
753
755 &gamma23,
756 &sigma23,
757 a2, 1,
758 a3, 1 );
759 }
760 else if ( !is_ident12 && !is_ident23 )
761 {
762 // Apply to all three columns.
763
765 &gamma12,
766 &sigma12,
767 &gamma23,
768 &sigma23,
769 a1, 1,
770 a2, 1,
771 a3, 1 );
772 }
773 }
774
775 if ( n_left == 1 )
776 {
777 g23 = buff_G + (g )*rs_G + (k )*cs_G;
778 a2 = buff_A + (g )*cs_A;
779 a3 = buff_A + (g + 1)*cs_A;
780
781 gamma23 = g23->real;
782 sigma23 = g23->imag;
783
784 is_ident23 = ( gamma23 == one && sigma23 == zero );
785
786 if ( !is_ident23 )
788 &gamma23,
789 &sigma23,
790 a2, 1,
791 a3, 1 );
792 }
793 }
794
795 // Pipeline stage
796
797 for ( j = k_minus_1; j < nG; ++j )
798 {
799 nG_app = k_G;
800 n_iter = nG_app / n_fuse;
801 n_left = nG_app % n_fuse;
802
803 for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
804 {
805 g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
806 g23 = buff_G + (g )*rs_G + (k )*cs_G;
807 a1 = buff_A + (g - 1)*cs_A;
808 a2 = buff_A + (g )*cs_A;
809 a3 = buff_A + (g + 1)*cs_A;
810
811 gamma12 = g12->real;
812 sigma12 = g12->imag;
813 gamma23 = g23->real;
814 sigma23 = g23->imag;
815
816 is_ident12 = ( gamma12 == one && sigma12 == zero );
817 is_ident23 = ( gamma23 == one && sigma23 == zero );
818
819 if ( !is_ident12 && is_ident23 )
820 {
821 // Apply only to columns 1 and 2.
822
824 &gamma12,
825 &sigma12,
826 a1, 1,
827 a2, 1 );
828 }
829 else if ( is_ident12 && !is_ident23 )
830 {
831 // Apply only to columns 2 and 3.
832
834 &gamma23,
835 &sigma23,
836 a2, 1,
837 a3, 1 );
838 }
839 else if ( !is_ident12 && !is_ident23 )
840 {
841 // Apply to all three columns.
842
844 &gamma12,
845 &sigma12,
846 &gamma23,
847 &sigma23,
848 a1, 1,
849 a2, 1,
850 a3, 1 );
851 }
852 }
853
854 if ( n_left == 1 )
855 {
856 g23 = buff_G + (g )*rs_G + (k )*cs_G;
857 a2 = buff_A + (g )*cs_A;
858 a3 = buff_A + (g + 1)*cs_A;
859
860 gamma23 = g23->real;
861 sigma23 = g23->imag;
862
863 is_ident23 = ( gamma23 == one && sigma23 == zero );
864
865 if ( !is_ident23 )
867 &gamma23,
868 &sigma23,
869 a2, 1,
870 a3, 1 );
871 }
872 }
873
874 // Shutdown stage
875
876 for ( j = 1; j < k_G; ++j )
877 {
878 nG_app = k_G - j;
879 n_iter = nG_app / n_fuse;
880 n_left = nG_app % n_fuse;
881
882 for ( i = 0, k = j, g = nG - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
883 {
884 g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
885 g23 = buff_G + (g )*rs_G + (k )*cs_G;
886 a1 = buff_A + (g - 1)*cs_A;
887 a2 = buff_A + (g )*cs_A;
888 a3 = buff_A + (g + 1)*cs_A;
889
890 gamma12 = g12->real;
891 sigma12 = g12->imag;
892 gamma23 = g23->real;
893 sigma23 = g23->imag;
894
895 is_ident12 = ( gamma12 == one && sigma12 == zero );
896 is_ident23 = ( gamma23 == one && sigma23 == zero );
897
898 if ( !is_ident12 && is_ident23 )
899 {
900 // Apply only to columns 1 and 2.
901
903 &gamma12,
904 &sigma12,
905 a1, 1,
906 a2, 1 );
907 }
908 else if ( is_ident12 && !is_ident23 )
909 {
910 // Apply only to columns 2 and 3.
911
913 &gamma23,
914 &sigma23,
915 a2, 1,
916 a3, 1 );
917 }
918 else if ( !is_ident12 && !is_ident23 )
919 {
920 // Apply to all three columns.
921
923 &gamma12,
924 &sigma12,
925 &gamma23,
926 &sigma23,
927 a1, 1,
928 a2, 1,
929 a3, 1 );
930 }
931 }
932
933 if ( n_left == 1 )
934 {
935 g23 = buff_G + (g )*rs_G + (k )*cs_G;
936 a2 = buff_A + (g )*cs_A;
937 a3 = buff_A + (g + 1)*cs_A;
938
939 gamma23 = g23->real;
940 sigma23 = g23->imag;
941
942 is_ident23 = ( gamma23 == one && sigma23 == zero );
943
944 if ( !is_ident23 )
946 &gamma23,
947 &sigma23,
948 a2, 1,
949 a3, 1 );
950 }
951 }
952
953 return FLA_SUCCESS;
954}

References bl1_s0(), bl1_s1(), FLA_Apply_G_rf_asc_var1(), i, and n_left.

Referenced by FLA_Apply_G_rf_asm_var6(), and FLA_Apply_G_rf_blc_var6().

◆ FLA_Apply_G_rf_asc_var6b()

FLA_Error FLA_Apply_G_rf_asc_var6b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
int  iTL,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A 
)
437{
439
440 return FLA_SUCCESS;
441}

References i.

Referenced by FLA_Apply_G_rf_asm_var6b().

◆ FLA_Apply_G_rf_asc_var7()

FLA_Error FLA_Apply_G_rf_asc_var7 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_asc_var8()

FLA_Error FLA_Apply_G_rf_asc_var8 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_asc_var8b()

FLA_Error FLA_Apply_G_rf_asc_var8b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
int  iTL,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_asc_var9()

FLA_Error FLA_Apply_G_rf_asc_var9 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A 
)
652{
653 float one = bl1_s1();
654 float zero = bl1_s0();
655 float gamma12;
656 float sigma12;
657 float gamma23;
658 float sigma23;
659 scomplex* a1;
660 scomplex* a2;
661 scomplex* a3;
662 scomplex* g12;
663 scomplex* g23;
664 int i, j, g, k;
665 int nG, nG_app;
666 int n_iter;
667 int n_left;
668 int k_minus_1;
669 int n_fuse;
671
672 k_minus_1 = k_G - 1;
673 nG = n_A - 1;
674 n_fuse = 2;
675
676 // Use the simple variant for nG < (k - 1) or k == 1.
677 if ( nG < 2*k_minus_1 || k_G == 1 )
678 {
680 m_A,
681 n_A,
682 buff_G, rs_G, cs_G,
683 buff_A, rs_A, cs_A );
684 return FLA_SUCCESS;
685 }
686
687
688 // Start-up phase.
689
690 for ( j = -1; j < k_minus_1; j += n_fuse )
691 {
692 nG_app = j + 1;
693 n_iter = nG_app;
694 n_left = 1;
695
696 for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g )
697 {
698 g12 = buff_G + (g )*rs_G + (k )*cs_G;
699 g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
700 a1 = buff_A + (g )*cs_A;
701 a2 = buff_A + (g + 1)*cs_A;
702 a3 = buff_A + (g + 2)*cs_A;
703
704 gamma12 = g12->real;
705 sigma12 = g12->imag;
706 gamma23 = g23->real;
707 sigma23 = g23->imag;
708
709 is_ident12 = ( gamma12 == one && sigma12 == zero );
710 is_ident23 = ( gamma23 == one && sigma23 == zero );
711
712 if ( !is_ident12 && is_ident23 )
713 {
714 // Apply only to columns 1 and 2.
715
717 &gamma12,
718 &sigma12,
719 a1, 1,
720 a2, 1 );
721 }
722 else if ( is_ident12 && !is_ident23 )
723 {
724 // Apply only to columns 2 and 3.
725
727 &gamma23,
728 &sigma23,
729 a2, 1,
730 a3, 1 );
731 }
732 else if ( !is_ident12 && !is_ident23 )
733 {
734 // Apply to all three columns.
735
737 &gamma12,
738 &sigma12,
739 &gamma23,
740 &sigma23,
741 a1, 1,
742 a2, 1,
743 a3, 1 );
744 }
745 }
746
747 if ( n_left == 1 )
748 {
749 g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
750 a2 = buff_A + (g + 1)*cs_A;
751 a3 = buff_A + (g + 2)*cs_A;
752
753 gamma23 = g23->real;
754 sigma23 = g23->imag;
755
756 is_ident23 = ( gamma23 == one && sigma23 == zero );
757
758 if ( !is_ident23 )
760 &gamma23,
761 &sigma23,
762 a2, 1,
763 a3, 1 );
764 }
765 }
766
767 // Pipeline stage
768
769 for ( ; j < nG - 1; j += n_fuse )
770 {
771 nG_app = k_G;
772 n_iter = nG_app;
773 n_left = 0;
774
775 for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g )
776 {
777 g12 = buff_G + (g )*rs_G + (k )*cs_G;
778 g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
779 a1 = buff_A + (g )*cs_A;
780 a2 = buff_A + (g + 1)*cs_A;
781 a3 = buff_A + (g + 2)*cs_A;
782
783 gamma12 = g12->real;
784 sigma12 = g12->imag;
785 gamma23 = g23->real;
786 sigma23 = g23->imag;
787
788 is_ident12 = ( gamma12 == one && sigma12 == zero );
789 is_ident23 = ( gamma23 == one && sigma23 == zero );
790
791 if ( !is_ident12 && is_ident23 )
792 {
793 // Apply only to columns 1 and 2.
794
796 &gamma12,
797 &sigma12,
798 a1, 1,
799 a2, 1 );
800 }
801 else if ( is_ident12 && !is_ident23 )
802 {
803 // Apply only to columns 2 and 3.
804
806 &gamma23,
807 &sigma23,
808 a2, 1,
809 a3, 1 );
810 }
811 else if ( !is_ident12 && !is_ident23 )
812 {
813 // Apply to all three columns.
814
816 &gamma12,
817 &sigma12,
818 &gamma23,
819 &sigma23,
820 a1, 1,
821 a2, 1,
822 a3, 1 );
823 }
824 }
825 }
826
827 // Shutdown stage
828
829 for ( j = nG % n_fuse; j < k_G; j += n_fuse )
830 {
831 g = nG - 1;
832 k = j;
833
834 n_left = 1;
835 if ( n_left == 1 )
836 {
837 g12 = buff_G + (g )*rs_G + (k )*cs_G;
838 a1 = buff_A + (g )*cs_A;
839 a2 = buff_A + (g + 1)*cs_A;
840
841 gamma12 = g12->real;
842 sigma12 = g12->imag;
843
844 is_ident12 = ( gamma12 == one && sigma12 == zero );
845
846 if ( !is_ident12 )
848 &gamma12,
849 &sigma12,
850 a1, 1,
851 a2, 1 );
852 ++k;
853 --g;
854 }
855
856 nG_app = k_minus_1 - j;
857 n_iter = nG_app;
858
859 for ( i = 0; i < n_iter; ++i, ++k, --g )
860 {
861 g12 = buff_G + (g )*rs_G + (k )*cs_G;
862 g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
863 a1 = buff_A + (g )*cs_A;
864 a2 = buff_A + (g + 1)*cs_A;
865 a3 = buff_A + (g + 2)*cs_A;
866
867 gamma12 = g12->real;
868 sigma12 = g12->imag;
869 gamma23 = g23->real;
870 sigma23 = g23->imag;
871
872 is_ident12 = ( gamma12 == one && sigma12 == zero );
873 is_ident23 = ( gamma23 == one && sigma23 == zero );
874
875 if ( !is_ident12 && is_ident23 )
876 {
877 // Apply only to columns 1 and 2.
878
880 &gamma12,
881 &sigma12,
882 a1, 1,
883 a2, 1 );
884 }
885 else if ( is_ident12 && !is_ident23 )
886 {
887 // Apply only to columns 2 and 3.
888
890 &gamma23,
891 &sigma23,
892 a2, 1,
893 a3, 1 );
894 }
895 else if ( !is_ident12 && !is_ident23 )
896 {
897 // Apply to all three columns.
898
900 &gamma12,
901 &sigma12,
902 &gamma23,
903 &sigma23,
904 a1, 1,
905 a2, 1,
906 a3, 1 );
907 }
908 }
909 }
910
911 return FLA_SUCCESS;
912}

References bl1_s0(), bl1_s1(), FLA_Apply_G_rf_asc_var1(), i, and n_left.

Referenced by FLA_Apply_G_rf_asm_var9(), and FLA_Apply_G_rf_blc_var9().

◆ FLA_Apply_G_rf_asc_var9b()

FLA_Error FLA_Apply_G_rf_asc_var9b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
int  iTL,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A 
)
422{
424
425 return FLA_SUCCESS;
426}

References i.

Referenced by FLA_Apply_G_rf_asm_var9b().

◆ FLA_Apply_G_rf_asd_var1()

FLA_Error FLA_Apply_G_rf_asd_var1 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double buff_A,
int  rs_A,
int  cs_A 
)
169{
170 double one = bl1_d1();
171 double zero = bl1_d0();
172 int nG_app = n_A - 1;
173 int l, j;
174 double gamma;
175 double sigma;
176 double* a1;
177 double* a2;
178 dcomplex* g1;
179 dcomplex* g11;
180
181 g1 = buff_G;
182
183 for ( l = 0; l < k_G; ++l )
184 {
185 a1 = buff_A;
186 a2 = buff_A + cs_A;
187 g11 = g1;
188
189 for ( j = 0; j < nG_app; ++j )
190 {
191 gamma = g11->real;
192 sigma = g11->imag;
193
194 // Skip the current iteration if the rotation is identity.
195 if ( gamma != one || sigma != zero )
196 {
198 &gamma,
199 &sigma,
200 a1, 1,
201 a2, 1 );
202 }
203
204 a1 += cs_A;
205 a2 += cs_A;
206 g11 += rs_G;
207 }
208
209 g1 += cs_G;
210 }
211
212 return FLA_SUCCESS;
213}
double bl1_d0(void)
Definition bl1_constants.c:118
double bl1_d1(void)
Definition bl1_constants.c:54
Definition blis_type_defs.h:138
double real
Definition blis_type_defs.h:139

References bl1_d0(), bl1_d1(), i, and dcomplex::real.

Referenced by FLA_Apply_G_rf_asd_var2(), FLA_Apply_G_rf_asd_var3(), FLA_Apply_G_rf_asd_var3b(), FLA_Apply_G_rf_asd_var6(), FLA_Apply_G_rf_asd_var6b(), FLA_Apply_G_rf_asd_var9(), FLA_Apply_G_rf_asd_var9b(), FLA_Apply_G_rf_asm_var1(), and FLA_Apply_G_rf_bld_var1().

◆ FLA_Apply_G_rf_asd_var2()

FLA_Error FLA_Apply_G_rf_asd_var2 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double buff_A,
int  rs_A,
int  cs_A 
)
231{
232 double one = bl1_d1();
233 double zero = bl1_d0();
234 double gamma;
235 double sigma;
236 double* a1;
237 double* a2;
238 dcomplex* g11;
239 int j, g, k;
240 int nG, nG_app;
241 int k_minus_1;
242
243 k_minus_1 = k_G - 1;
244 nG = n_A - 1;
245
246 // Use the simple variant for nG < 2(k - 1).
247 if ( nG < k_minus_1 || k_G == 1 )
248 {
250 m_A,
251 n_A,
252 buff_G, rs_G, cs_G,
253 buff_A, rs_A, cs_A );
254 return FLA_SUCCESS;
255 }
256
257
258 // Start-up phase.
259
260 for ( j = 0; j < k_minus_1; ++j )
261 {
262 nG_app = j + 1;
263
264 for ( k = 0, g = nG_app - 1; k < nG_app; ++k, --g )
265 {
266 g11 = buff_G + (g )*rs_G + (k )*cs_G;
267 a1 = buff_A + (g )*cs_A;
268 a2 = buff_A + (g + 1)*cs_A;
269
270 gamma = g11->real;
271 sigma = g11->imag;
272
273 // Skip the current iteration if the rotation is identity.
274 if ( gamma == one && sigma == zero ) continue;
275
277 &gamma,
278 &sigma,
279 a1, 1,
280 a2, 1 );
281 }
282 }
283
284 // Pipeline stage
285
286 for ( j = k_minus_1; j < nG; ++j )
287 {
288 nG_app = k_G;
289
290 for ( k = 0, g = j; k < nG_app; ++k, --g )
291 {
292 g11 = buff_G + (g )*rs_G + (k )*cs_G;
293 a1 = buff_A + (g )*cs_A;
294 a2 = buff_A + (g + 1)*cs_A;
295
296 gamma = g11->real;
297 sigma = g11->imag;
298
299 // Skip the current iteration if the rotation is identity.
300 if ( gamma == one && sigma == zero ) continue;
301
303 &gamma,
304 &sigma,
305 a1, 1,
306 a2, 1 );
307 }
308 }
309
310 // Shutdown stage
311
312 for ( j = nG - k_minus_1; j < nG; ++j )
313 {
314 nG_app = nG - j;
315
316 for ( k = k_G - nG_app, g = nG - 1; k < k_G; ++k, --g )
317 {
318 g11 = buff_G + (g )*rs_G + (k )*cs_G;
319 a1 = buff_A + (g )*cs_A;
320 a2 = buff_A + (g + 1)*cs_A;
321
322 gamma = g11->real;
323 sigma = g11->imag;
324
325 // Skip the current iteration if the rotation is identity.
326 if ( gamma == one && sigma == zero ) continue;
327
329 &gamma,
330 &sigma,
331 a1, 1,
332 a2, 1 );
333 }
334 }
335
336 return FLA_SUCCESS;
337}
FLA_Error FLA_Apply_G_rf_asd_var1(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var1.c:164

References bl1_d0(), bl1_d1(), FLA_Apply_G_rf_asd_var1(), and i.

Referenced by FLA_Apply_G_rf_asm_var2(), and FLA_Apply_G_rf_bld_var2().

◆ FLA_Apply_G_rf_asd_var3()

FLA_Error FLA_Apply_G_rf_asd_var3 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double buff_A,
int  rs_A,
int  cs_A 
)
571{
572 double one = bl1_d1();
573 double zero = bl1_d0();
574 double gamma23_k1;
575 double sigma23_k1;
576 double gamma34_k1;
577 double sigma34_k1;
578 double gamma12_k2;
579 double sigma12_k2;
580 double gamma23_k2;
581 double sigma23_k2;
582 double* a1;
583 double* a2;
584 double* a3;
585 double* a4;
590 int i, j, g, k;
591 int nG, nG_app;
592 int n_iter;
593 int n_left;
594 int k_minus_1;
595 int n_fuse;
596 int k_fuse;
599 int has_ident;
600
601 k_minus_1 = k_G - 1;
602 nG = n_A - 1;
603 n_fuse = 2;
604 k_fuse = 2;
605
606 // Use the simple variant for nG < (k - 1) or k == 1.
607 if ( nG < 2*k_minus_1 || k_G == 1 )
608 {
610 m_A,
611 n_A,
612 buff_G, rs_G, cs_G,
613 buff_A, rs_A, cs_A );
614 return FLA_SUCCESS;
615 }
616
617
618 // Start-up phase.
619
620 for ( j = -1; j < k_minus_1; j += n_fuse )
621 {
622 nG_app = j + 2;
623 n_iter = nG_app / k_fuse;
624 n_left = 1;
625
626 for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
627 {
628 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
629 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
630 g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
631 g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
632 a1 = buff_A + (g - 1)*cs_A;
633 a2 = buff_A + (g )*cs_A;
634 a3 = buff_A + (g + 1)*cs_A;
635 a4 = buff_A + (g + 2)*cs_A;
636
637 gamma23_k1 = g23_k1->real;
638 sigma23_k1 = g23_k1->imag;
639 gamma34_k1 = g34_k1->real;
640 sigma34_k1 = g34_k1->imag;
641 gamma12_k2 = g12_k2->real;
642 sigma12_k2 = g12_k2->imag;
643 gamma23_k2 = g23_k2->real;
644 sigma23_k2 = g23_k2->imag;
645
652
653 if ( has_ident )
654 {
655 // Apply to pairs of columns as needed.
656
657 if ( !is_ident23_k1 )
659 &gamma23_k1,
660 &sigma23_k1,
661 a2, 1,
662 a3, 1 );
663
664 if ( !is_ident34_k1 )
666 &gamma34_k1,
667 &sigma34_k1,
668 a3, 1,
669 a4, 1 );
670
671 if ( !is_ident12_k2 )
673 &gamma12_k2,
674 &sigma12_k2,
675 a1, 1,
676 a2, 1 );
677
678 if ( !is_ident23_k2 )
680 &gamma23_k2,
681 &sigma23_k2,
682 a2, 1,
683 a3, 1 );
684 }
685 else
686 {
687 // Apply to all four columns.
688
690 &gamma23_k1,
691 &sigma23_k1,
692 &gamma34_k1,
693 &sigma34_k1,
694 &gamma12_k2,
695 &sigma12_k2,
696 &gamma23_k2,
697 &sigma23_k2,
698 a1, 1,
699 a2, 1,
700 a3, 1,
701 a4, 1 );
702 }
703 }
704
705 if ( n_left == 1 )
706 {
707 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
708 a3 = buff_A + (g + 1)*cs_A;
709 a4 = buff_A + (g + 2)*cs_A;
710
711 gamma34_k1 = g34_k1->real;
712 sigma34_k1 = g34_k1->imag;
713
715
716 if ( !is_ident34_k1 )
718 &gamma34_k1,
719 &sigma34_k1,
720 a3, 1,
721 a4, 1 );
722 }
723 }
724
725 // Pipeline stage
726
727 for ( ; j < nG - 1; j += n_fuse )
728 {
729 nG_app = k_G;
730 n_iter = nG_app / k_fuse;
731 n_left = nG_app % k_fuse;
732
733 for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
734 {
735 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
736 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
737 g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
738 g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
739 a1 = buff_A + (g - 1)*cs_A;
740 a2 = buff_A + (g )*cs_A;
741 a3 = buff_A + (g + 1)*cs_A;
742 a4 = buff_A + (g + 2)*cs_A;
743
744 gamma23_k1 = g23_k1->real;
745 sigma23_k1 = g23_k1->imag;
746 gamma34_k1 = g34_k1->real;
747 sigma34_k1 = g34_k1->imag;
748 gamma12_k2 = g12_k2->real;
749 sigma12_k2 = g12_k2->imag;
750 gamma23_k2 = g23_k2->real;
751 sigma23_k2 = g23_k2->imag;
752
759
760 if ( has_ident )
761 {
762 // Apply to pairs of columns as needed.
763
764 if ( !is_ident23_k1 )
766 &gamma23_k1,
767 &sigma23_k1,
768 a2, 1,
769 a3, 1 );
770
771 if ( !is_ident34_k1 )
773 &gamma34_k1,
774 &sigma34_k1,
775 a3, 1,
776 a4, 1 );
777
778 if ( !is_ident12_k2 )
780 &gamma12_k2,
781 &sigma12_k2,
782 a1, 1,
783 a2, 1 );
784
785 if ( !is_ident23_k2 )
787 &gamma23_k2,
788 &sigma23_k2,
789 a2, 1,
790 a3, 1 );
791 }
792 else
793 {
794 // Apply to all four columns.
795
797 &gamma23_k1,
798 &sigma23_k1,
799 &gamma34_k1,
800 &sigma34_k1,
801 &gamma12_k2,
802 &sigma12_k2,
803 &gamma23_k2,
804 &sigma23_k2,
805 a1, 1,
806 a2, 1,
807 a3, 1,
808 a4, 1 );
809 }
810 }
811
812 if ( n_left == 1 )
813 {
814 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
815 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
816 a2 = buff_A + (g )*cs_A;
817 a3 = buff_A + (g + 1)*cs_A;
818 a4 = buff_A + (g + 2)*cs_A;
819
820 gamma23_k1 = g23_k1->real;
821 sigma23_k1 = g23_k1->imag;
822 gamma34_k1 = g34_k1->real;
823 sigma34_k1 = g34_k1->imag;
824
827
829 {
831 &gamma23_k1,
832 &sigma23_k1,
833 a2, 1,
834 a3, 1 );
835 }
836 else if ( is_ident23_k1 && !is_ident34_k1 )
837 {
839 &gamma34_k1,
840 &sigma34_k1,
841 a3, 1,
842 a4, 1 );
843 }
844 else
845 {
847 &gamma23_k1,
848 &sigma23_k1,
849 &gamma34_k1,
850 &sigma34_k1,
851 a2, 1,
852 a3, 1,
853 a4, 1 );
854 }
855 }
856 }
857
858 // Shutdown stage
859
860 for ( j = nG % n_fuse; j < k_G; j += n_fuse )
861 {
862 g = nG - 1;
863 k = j;
864
865 //n_left = 1;
866 //if ( n_left == 1 )
867 {
868 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
869 a2 = buff_A + (g )*cs_A;
870 a3 = buff_A + (g + 1)*cs_A;
871
872 gamma23_k1 = g23_k1->real;
873 sigma23_k1 = g23_k1->imag;
874
876
877 if ( !is_ident23_k1 )
879 &gamma23_k1,
880 &sigma23_k1,
881 a2, 1,
882 a3, 1 );
883 ++k;
884 --g;
885 }
886
887 nG_app = k_minus_1 - j;
888 n_iter = nG_app / k_fuse;
889 n_left = nG_app % k_fuse;
890
891 for ( i = 0; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
892 {
893 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
894 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
895 g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
896 g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
897 a1 = buff_A + (g - 1)*cs_A;
898 a2 = buff_A + (g )*cs_A;
899 a3 = buff_A + (g + 1)*cs_A;
900 a4 = buff_A + (g + 2)*cs_A;
901
902 gamma23_k1 = g23_k1->real;
903 sigma23_k1 = g23_k1->imag;
904 gamma34_k1 = g34_k1->real;
905 sigma34_k1 = g34_k1->imag;
906 gamma12_k2 = g12_k2->real;
907 sigma12_k2 = g12_k2->imag;
908 gamma23_k2 = g23_k2->real;
909 sigma23_k2 = g23_k2->imag;
910
917
918 if ( has_ident )
919 {
920 // Apply to pairs of columns as needed.
921
922 if ( !is_ident23_k1 )
924 &gamma23_k1,
925 &sigma23_k1,
926 a2, 1,
927 a3, 1 );
928
929 if ( !is_ident34_k1 )
931 &gamma34_k1,
932 &sigma34_k1,
933 a3, 1,
934 a4, 1 );
935
936 if ( !is_ident12_k2 )
938 &gamma12_k2,
939 &sigma12_k2,
940 a1, 1,
941 a2, 1 );
942
943 if ( !is_ident23_k2 )
945 &gamma23_k2,
946 &sigma23_k2,
947 a2, 1,
948 a3, 1 );
949 }
950 else
951 {
952 // Apply to all four columns.
953
955 &gamma23_k1,
956 &sigma23_k1,
957 &gamma34_k1,
958 &sigma34_k1,
959 &gamma12_k2,
960 &sigma12_k2,
961 &gamma23_k2,
962 &sigma23_k2,
963 a1, 1,
964 a2, 1,
965 a3, 1,
966 a4, 1 );
967 }
968 }
969
970 if ( n_left == 1 )
971 {
972 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
973 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
974 a2 = buff_A + (g )*cs_A;
975 a3 = buff_A + (g + 1)*cs_A;
976 a4 = buff_A + (g + 2)*cs_A;
977
978 gamma23_k1 = g23_k1->real;
979 sigma23_k1 = g23_k1->imag;
980 gamma34_k1 = g34_k1->real;
981 sigma34_k1 = g34_k1->imag;
982
985
987 {
989 &gamma23_k1,
990 &sigma23_k1,
991 a2, 1,
992 a3, 1 );
993 }
994 else if ( is_ident23_k1 && !is_ident34_k1 )
995 {
997 &gamma34_k1,
998 &sigma34_k1,
999 a3, 1,
1000 a4, 1 );
1001 }
1002 else
1003 {
1005 &gamma23_k1,
1006 &sigma23_k1,
1007 &gamma34_k1,
1008 &sigma34_k1,
1009 a2, 1,
1010 a3, 1,
1011 a4, 1 );
1012 }
1013 }
1014 }
1015
1016 return FLA_SUCCESS;
1017}

References bl1_d0(), bl1_d1(), FLA_Apply_G_rf_asd_var1(), i, and n_left.

Referenced by FLA_Apply_G_rf_asm_var3().

◆ FLA_Apply_G_rf_asd_var3b()

FLA_Error FLA_Apply_G_rf_asd_var3b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
int  iTL,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double buff_A,
int  rs_A,
int  cs_A 
)
138{
139 double one = bl1_d1();
140 double zero = bl1_d0();
141 double gamma23_k1;
142 double sigma23_k1;
143 double gamma34_k1;
144 double sigma34_k1;
145 double gamma12_k2;
146 double sigma12_k2;
147 double gamma23_k2;
148 double sigma23_k2;
149 double* a1;
150 double* a2;
151 double* a3;
152 double* a4;
157 int i, j, g, k;
158 int nG, nG_app;
159 int n_iter;
160 int n_left;
161 int k_minus_1;
162 int n_fuse;
163 int k_fuse;
166 int has_ident;
167 int m_app;
168
169
170 k_minus_1 = k_G - 1;
171 nG = n_A - 1;
172 n_fuse = 2;
173 k_fuse = 2;
174
175 // Use the simple variant for nG < (k - 1) or k == 1.
176 if ( nG < 2*k_minus_1 || k_G == 1 )
177 {
179 m_A,
180 n_A,
181 buff_G, rs_G, cs_G,
182 buff_A, rs_A, cs_A );
183 return FLA_SUCCESS;
184 }
185
186
187 // Start-up phase.
188
189 for ( j = -1; j < k_minus_1; j += n_fuse )
190 {
191 nG_app = j + 2;
192 n_iter = nG_app / k_fuse;
193 //n_iter = nG_app % k_fuse;
194 n_left = 1;
195
196 for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
197 {
198 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
199 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
200 g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
201 g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
202 a1 = buff_A + (g - 1)*cs_A;
203 a2 = buff_A + (g )*cs_A;
204 a3 = buff_A + (g + 1)*cs_A;
205 a4 = buff_A + (g + 2)*cs_A;
206
207 gamma23_k1 = g23_k1->real;
208 sigma23_k1 = g23_k1->imag;
209 gamma34_k1 = g34_k1->real;
210 sigma34_k1 = g34_k1->imag;
211 gamma12_k2 = g12_k2->real;
212 sigma12_k2 = g12_k2->imag;
213 gamma23_k2 = g23_k2->real;
214 sigma23_k2 = g23_k2->imag;
215
222
223 m_app = min( i_k + 3 + j - iTL, m_A );
224 m_app = max( m_app, 0 );
225
226 if ( has_ident )
227 {
228 // Apply to pairs of columns as needed.
229
230 if ( !is_ident23_k1 )
232 &gamma23_k1,
233 &sigma23_k1,
234 a2, 1,
235 a3, 1 );
236
237 if ( !is_ident34_k1 )
239 &gamma34_k1,
240 &sigma34_k1,
241 a3, 1,
242 a4, 1 );
243
244 if ( !is_ident12_k2 )
246 &gamma12_k2,
247 &sigma12_k2,
248 a1, 1,
249 a2, 1 );
250
251 if ( !is_ident23_k2 )
253 &gamma23_k2,
254 &sigma23_k2,
255 a2, 1,
256 a3, 1 );
257 }
258 else
259 {
260 // Apply to all four columns.
261
263 &gamma23_k1,
264 &sigma23_k1,
265 &gamma34_k1,
266 &sigma34_k1,
267 &gamma12_k2,
268 &sigma12_k2,
269 &gamma23_k2,
270 &sigma23_k2,
271 a1, 1,
272 a2, 1,
273 a3, 1,
274 a4, 1 );
275 }
276 }
277
278 if ( n_left == 1 )
279 {
280 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
281 a3 = buff_A + (g + 1)*cs_A;
282 a4 = buff_A + (g + 2)*cs_A;
283
284 gamma34_k1 = g34_k1->real;
285 sigma34_k1 = g34_k1->imag;
286
288
289 m_app = min( i_k + 3 + j - iTL, m_A );
290 m_app = max( m_app, 0 );
291
292 if ( !is_ident34_k1 )
294 &gamma34_k1,
295 &sigma34_k1,
296 a3, 1,
297 a4, 1 );
298 }
299 }
300
301 // Pipeline stage
302
303 for ( ; j < nG - 1; j += n_fuse )
304 {
305 nG_app = k_G;
306 n_iter = nG_app / k_fuse;
307 n_left = nG_app % k_fuse;
308
309 for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
310 {
311 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
312 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
313 g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
314 g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
315 a1 = buff_A + (g - 1)*cs_A;
316 a2 = buff_A + (g )*cs_A;
317 a3 = buff_A + (g + 1)*cs_A;
318 a4 = buff_A + (g + 2)*cs_A;
319
320 gamma23_k1 = g23_k1->real;
321 sigma23_k1 = g23_k1->imag;
322 gamma34_k1 = g34_k1->real;
323 sigma34_k1 = g34_k1->imag;
324 gamma12_k2 = g12_k2->real;
325 sigma12_k2 = g12_k2->imag;
326 gamma23_k2 = g23_k2->real;
327 sigma23_k2 = g23_k2->imag;
328
335
336 m_app = min( i_k + 3 + j - iTL, m_A );
337 m_app = max( m_app, 0 );
338
339 if ( has_ident )
340 {
341 // Apply to pairs of columns as needed.
342
343 if ( !is_ident23_k1 )
345 &gamma23_k1,
346 &sigma23_k1,
347 a2, 1,
348 a3, 1 );
349
350 if ( !is_ident34_k1 )
352 &gamma34_k1,
353 &sigma34_k1,
354 a3, 1,
355 a4, 1 );
356
357 if ( !is_ident12_k2 )
359 &gamma12_k2,
360 &sigma12_k2,
361 a1, 1,
362 a2, 1 );
363
364 if ( !is_ident23_k2 )
366 &gamma23_k2,
367 &sigma23_k2,
368 a2, 1,
369 a3, 1 );
370 }
371 else
372 {
373 // Apply to all four columns.
374
376 &gamma23_k1,
377 &sigma23_k1,
378 &gamma34_k1,
379 &sigma34_k1,
380 &gamma12_k2,
381 &sigma12_k2,
382 &gamma23_k2,
383 &sigma23_k2,
384 a1, 1,
385 a2, 1,
386 a3, 1,
387 a4, 1 );
388 }
389 }
390
391 if ( n_left == 1 )
392 {
393 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
394 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
395 a2 = buff_A + (g )*cs_A;
396 a3 = buff_A + (g + 1)*cs_A;
397 a4 = buff_A + (g + 2)*cs_A;
398
399 gamma23_k1 = g23_k1->real;
400 sigma23_k1 = g23_k1->imag;
401 gamma34_k1 = g34_k1->real;
402 sigma34_k1 = g34_k1->imag;
403
406
407 m_app = min( i_k + 3 + j - iTL, m_A );
408 m_app = max( m_app, 0 );
409
411 {
413 &gamma23_k1,
414 &sigma23_k1,
415 a2, 1,
416 a3, 1 );
417 }
418 else if ( is_ident23_k1 && !is_ident34_k1 )
419 {
421 &gamma34_k1,
422 &sigma34_k1,
423 a3, 1,
424 a4, 1 );
425 }
426 else
427 {
429 &gamma23_k1,
430 &sigma23_k1,
431 &gamma34_k1,
432 &sigma34_k1,
433 a2, 1,
434 a3, 1,
435 a4, 1 );
436 }
437 }
438 }
439
440 // Shutdown stage
441
442 for ( j = nG % n_fuse; j < k_G; j += n_fuse )
443 {
444 g = nG - 1;
445 k = j;
446
447 //n_left = 1;
448 //if ( n_left == 1 )
449 {
450 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
451 a2 = buff_A + (g )*cs_A;
452 a3 = buff_A + (g + 1)*cs_A;
453
454 gamma23_k1 = g23_k1->real;
455 sigma23_k1 = g23_k1->imag;
456
458
459 m_app = m_A;
460
461 if ( !is_ident23_k1 )
463 &gamma23_k1,
464 &sigma23_k1,
465 a2, 1,
466 a3, 1 );
467 ++k;
468 --g;
469 }
470
471 nG_app = k_minus_1 - j;
472 n_iter = nG_app / k_fuse;
473 n_left = nG_app % k_fuse;
474
475 for ( i = 0; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
476 {
477 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
478 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
479 g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
480 g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
481 a1 = buff_A + (g - 1)*cs_A;
482 a2 = buff_A + (g )*cs_A;
483 a3 = buff_A + (g + 1)*cs_A;
484 a4 = buff_A + (g + 2)*cs_A;
485
486 gamma23_k1 = g23_k1->real;
487 sigma23_k1 = g23_k1->imag;
488 gamma34_k1 = g34_k1->real;
489 sigma34_k1 = g34_k1->imag;
490 gamma12_k2 = g12_k2->real;
491 sigma12_k2 = g12_k2->imag;
492 gamma23_k2 = g23_k2->real;
493 sigma23_k2 = g23_k2->imag;
494
501
502 m_app = m_A;
503
504 if ( has_ident )
505 {
506 // Apply to pairs of columns as needed.
507
508 if ( !is_ident23_k1 )
510 &gamma23_k1,
511 &sigma23_k1,
512 a2, 1,
513 a3, 1 );
514
515 if ( !is_ident34_k1 )
517 &gamma34_k1,
518 &sigma34_k1,
519 a3, 1,
520 a4, 1 );
521
522 if ( !is_ident12_k2 )
524 &gamma12_k2,
525 &sigma12_k2,
526 a1, 1,
527 a2, 1 );
528
529 if ( !is_ident23_k2 )
531 &gamma23_k2,
532 &sigma23_k2,
533 a2, 1,
534 a3, 1 );
535 }
536 else
537 {
538 // Apply to all four columns.
539
541 &gamma23_k1,
542 &sigma23_k1,
543 &gamma34_k1,
544 &sigma34_k1,
545 &gamma12_k2,
546 &sigma12_k2,
547 &gamma23_k2,
548 &sigma23_k2,
549 a1, 1,
550 a2, 1,
551 a3, 1,
552 a4, 1 );
553 }
554 }
555
556 if ( n_left == 1 )
557 {
558 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
559 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
560 a2 = buff_A + (g )*cs_A;
561 a3 = buff_A + (g + 1)*cs_A;
562 a4 = buff_A + (g + 2)*cs_A;
563
564 gamma23_k1 = g23_k1->real;
565 sigma23_k1 = g23_k1->imag;
566 gamma34_k1 = g34_k1->real;
567 sigma34_k1 = g34_k1->imag;
568
571
572 m_app = m_A;
573
575 {
577 &gamma23_k1,
578 &sigma23_k1,
579 a2, 1,
580 a3, 1 );
581 }
582 else if ( is_ident23_k1 && !is_ident34_k1 )
583 {
585 &gamma34_k1,
586 &sigma34_k1,
587 a3, 1,
588 a4, 1 );
589 }
590 else
591 {
593 &gamma23_k1,
594 &sigma23_k1,
595 &gamma34_k1,
596 &sigma34_k1,
597 a2, 1,
598 a3, 1,
599 a4, 1 );
600 }
601 }
602 }
603
604 return FLA_SUCCESS;
605}

References bl1_d0(), bl1_d1(), FLA_Apply_G_rf_asd_var1(), i, and n_left.

Referenced by FLA_Apply_G_rf_asm_var3b(), and FLA_Apply_G_rf_bld_var3b().

◆ FLA_Apply_G_rf_asd_var4()

FLA_Error FLA_Apply_G_rf_asd_var4 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_asd_var5()

FLA_Error FLA_Apply_G_rf_asd_var5 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_asd_var5b()

FLA_Error FLA_Apply_G_rf_asd_var5b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
int  iTL,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_asd_var6()

FLA_Error FLA_Apply_G_rf_asd_var6 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double buff_A,
int  rs_A,
int  cs_A 
)
399{
400 double one = bl1_d1();
401 double zero = bl1_d0();
402 double gamma12;
403 double sigma12;
404 double gamma23;
405 double sigma23;
406 double* a1;
407 double* a2;
408 double* a3;
409 dcomplex* g12;
410 dcomplex* g23;
411 int i, j, g, k;
412 int nG, nG_app;
413 int n_iter;
414 int n_left;
415 int k_minus_1;
416 int n_fuse;
418
419 k_minus_1 = k_G - 1;
420 nG = n_A - 1;
421 n_fuse = 2;
422
423 // Use the simple variant for nG < (k - 1) or k == 1.
424 if ( nG < k_minus_1 || k_G == 1 )
425 {
427 m_A,
428 n_A,
429 buff_G, rs_G, cs_G,
430 buff_A, rs_A, cs_A );
431 return FLA_SUCCESS;
432 }
433
434
435 // Start-up phase.
436
437 for ( j = 0; j < k_minus_1; ++j )
438 {
439 nG_app = j + 1;
440 n_iter = nG_app / n_fuse;
441 n_left = nG_app % n_fuse;
442
443 for ( i = 0, k = 0, g = nG_app - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
444 {
445 g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
446 g23 = buff_G + (g )*rs_G + (k )*cs_G;
447 a1 = buff_A + (g - 1)*cs_A;
448 a2 = buff_A + (g )*cs_A;
449 a3 = buff_A + (g + 1)*cs_A;
450
451 gamma12 = g12->real;
452 sigma12 = g12->imag;
453 gamma23 = g23->real;
454 sigma23 = g23->imag;
455
456 is_ident12 = ( gamma12 == one && sigma12 == zero );
457 is_ident23 = ( gamma23 == one && sigma23 == zero );
458
459 if ( !is_ident12 && is_ident23 )
460 {
461 // Apply only to columns 1 and 2.
462
464 &gamma12,
465 &sigma12,
466 a1, 1,
467 a2, 1 );
468 }
469 else if ( is_ident12 && !is_ident23 )
470 {
471 // Apply only to columns 2 and 3.
472
474 &gamma23,
475 &sigma23,
476 a2, 1,
477 a3, 1 );
478 }
479 else if ( !is_ident12 && !is_ident23 )
480 {
481 // Apply to all three columns.
482
484 &gamma12,
485 &sigma12,
486 &gamma23,
487 &sigma23,
488 a1, 1,
489 a2, 1,
490 a3, 1 );
491 }
492 }
493
494 if ( n_left == 1 )
495 {
496 g23 = buff_G + (g )*rs_G + (k )*cs_G;
497 a2 = buff_A + (g )*cs_A;
498 a3 = buff_A + (g + 1)*cs_A;
499
500 gamma23 = g23->real;
501 sigma23 = g23->imag;
502
503 is_ident23 = ( gamma23 == one && sigma23 == zero );
504
505 if ( !is_ident23 )
507 &gamma23,
508 &sigma23,
509 a2, 1,
510 a3, 1 );
511 }
512 }
513
514 // Pipeline stage
515
516 for ( j = k_minus_1; j < nG; ++j )
517 {
518 nG_app = k_G;
519 n_iter = nG_app / n_fuse;
520 n_left = nG_app % n_fuse;
521
522 for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
523 {
524 g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
525 g23 = buff_G + (g )*rs_G + (k )*cs_G;
526 a1 = buff_A + (g - 1)*cs_A;
527 a2 = buff_A + (g )*cs_A;
528 a3 = buff_A + (g + 1)*cs_A;
529
530 gamma12 = g12->real;
531 sigma12 = g12->imag;
532 gamma23 = g23->real;
533 sigma23 = g23->imag;
534
535 is_ident12 = ( gamma12 == one && sigma12 == zero );
536 is_ident23 = ( gamma23 == one && sigma23 == zero );
537
538 if ( !is_ident12 && is_ident23 )
539 {
540 // Apply only to columns 1 and 2.
541
543 &gamma12,
544 &sigma12,
545 a1, 1,
546 a2, 1 );
547 }
548 else if ( is_ident12 && !is_ident23 )
549 {
550 // Apply only to columns 2 and 3.
551
553 &gamma23,
554 &sigma23,
555 a2, 1,
556 a3, 1 );
557 }
558 else if ( !is_ident12 && !is_ident23 )
559 {
560 // Apply to all three columns.
561
563 &gamma12,
564 &sigma12,
565 &gamma23,
566 &sigma23,
567 a1, 1,
568 a2, 1,
569 a3, 1 );
570 }
571 }
572
573 if ( n_left == 1 )
574 {
575 g23 = buff_G + (g )*rs_G + (k )*cs_G;
576 a2 = buff_A + (g )*cs_A;
577 a3 = buff_A + (g + 1)*cs_A;
578
579 gamma23 = g23->real;
580 sigma23 = g23->imag;
581
582 is_ident23 = ( gamma23 == one && sigma23 == zero );
583
584 if ( !is_ident23 )
586 &gamma23,
587 &sigma23,
588 a2, 1,
589 a3, 1 );
590 }
591 }
592
593 // Shutdown stage
594
595 for ( j = 1; j < k_G; ++j )
596 {
597 nG_app = k_G - j;
598 n_iter = nG_app / n_fuse;
599 n_left = nG_app % n_fuse;
600
601 for ( i = 0, k = j, g = nG - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
602 {
603 g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
604 g23 = buff_G + (g )*rs_G + (k )*cs_G;
605 a1 = buff_A + (g - 1)*cs_A;
606 a2 = buff_A + (g )*cs_A;
607 a3 = buff_A + (g + 1)*cs_A;
608
609 gamma12 = g12->real;
610 sigma12 = g12->imag;
611 gamma23 = g23->real;
612 sigma23 = g23->imag;
613
614 is_ident12 = ( gamma12 == one && sigma12 == zero );
615 is_ident23 = ( gamma23 == one && sigma23 == zero );
616
617 if ( !is_ident12 && is_ident23 )
618 {
619 // Apply only to columns 1 and 2.
620
622 &gamma12,
623 &sigma12,
624 a1, 1,
625 a2, 1 );
626 }
627 else if ( is_ident12 && !is_ident23 )
628 {
629 // Apply only to columns 2 and 3.
630
632 &gamma23,
633 &sigma23,
634 a2, 1,
635 a3, 1 );
636 }
637 else if ( !is_ident12 && !is_ident23 )
638 {
639 // Apply to all three columns.
640
642 &gamma12,
643 &sigma12,
644 &gamma23,
645 &sigma23,
646 a1, 1,
647 a2, 1,
648 a3, 1 );
649 }
650 }
651
652 if ( n_left == 1 )
653 {
654 g23 = buff_G + (g )*rs_G + (k )*cs_G;
655 a2 = buff_A + (g )*cs_A;
656 a3 = buff_A + (g + 1)*cs_A;
657
658 gamma23 = g23->real;
659 sigma23 = g23->imag;
660
661 is_ident23 = ( gamma23 == one && sigma23 == zero );
662
663 if ( !is_ident23 )
665 &gamma23,
666 &sigma23,
667 a2, 1,
668 a3, 1 );
669 }
670 }
671
672 return FLA_SUCCESS;
673}

References bl1_d0(), bl1_d1(), FLA_Apply_G_rf_asd_var1(), i, and n_left.

Referenced by FLA_Apply_G_rf_asm_var6(), and FLA_Apply_G_rf_bld_var6().

◆ FLA_Apply_G_rf_asd_var6b()

FLA_Error FLA_Apply_G_rf_asd_var6b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
int  iTL,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double buff_A,
int  rs_A,
int  cs_A 
)
138{
139 double one = bl1_d1();
140 double zero = bl1_d0();
141 double gamma12;
142 double sigma12;
143 double gamma23;
144 double sigma23;
145 double* a1;
146 double* a2;
147 double* a3;
148 dcomplex* g12;
149 dcomplex* g23;
150 int i, j, g, k;
151 int nG, nG_app;
152 int n_iter;
153 int n_left;
154 int k_minus_1;
155 int n_fuse;
157 int m_app;
158
159
160 k_minus_1 = k_G - 1;
161 nG = n_A - 1;
162 n_fuse = 2;
163
164 // Use the simple variant for nG < (k - 1) or k == 1.
165 if ( nG < k_minus_1 || k_G == 1 )
166 {
168 m_A,
169 n_A,
170 buff_G, rs_G, cs_G,
171 buff_A, rs_A, cs_A );
172 return FLA_SUCCESS;
173 }
174
175
176 // Start-up phase.
177
178 for ( j = 0; j < k_minus_1; ++j )
179 {
180 nG_app = j + 1;
181 n_iter = nG_app / n_fuse;
182 n_left = nG_app % n_fuse;
183
184 for ( i = 0, k = 0, g = nG_app - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
185 {
186 g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
187 g23 = buff_G + (g )*rs_G + (k )*cs_G;
188 a1 = buff_A + (g - 1)*cs_A;
189 a2 = buff_A + (g )*cs_A;
190 a3 = buff_A + (g + 1)*cs_A;
191
192 gamma12 = g12->real;
193 sigma12 = g12->imag;
194 gamma23 = g23->real;
195 sigma23 = g23->imag;
196
197 is_ident12 = ( gamma12 == one && sigma12 == zero );
198 is_ident23 = ( gamma23 == one && sigma23 == zero );
199
200 m_app = min( i_k + 2 + j - iTL, m_A );
201 m_app = max( m_app, 0 );
202
203 if ( !is_ident12 && is_ident23 )
204 {
205 // Apply only to columns 1 and 2.
206
208 &gamma12,
209 &sigma12,
210 a1, 1,
211 a2, 1 );
212 }
213 else if ( is_ident12 && !is_ident23 )
214 {
215 // Apply only to columns 2 and 3.
216
218 &gamma23,
219 &sigma23,
220 a2, 1,
221 a3, 1 );
222 }
223 else if ( !is_ident12 && !is_ident23 )
224 {
225 // Apply to all three columns.
226
228 &gamma12,
229 &sigma12,
230 &gamma23,
231 &sigma23,
232 a1, 1,
233 a2, 1,
234 a3, 1 );
235 }
236 }
237 if ( n_left == 1 )
238 {
239 g23 = buff_G + (g )*rs_G + (k )*cs_G;
240 a2 = buff_A + (g )*cs_A;
241 a3 = buff_A + (g + 1)*cs_A;
242
243 gamma23 = g23->real;
244 sigma23 = g23->imag;
245
246 is_ident23 = ( gamma23 == one && sigma23 == zero );
247
248 m_app = min( i_k + 2 + j - iTL, m_A );
249 m_app = max( m_app, 0 );
250
251 if ( !is_ident23 )
253 &gamma23,
254 &sigma23,
255 a2, 1,
256 a3, 1 );
257 }
258 }
259
260 // Pipeline stage
261
262 for ( j = k_minus_1; j < nG; ++j )
263 {
264 nG_app = k_G;
265 n_iter = nG_app / n_fuse;
266 n_left = nG_app % n_fuse;
267
268 for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
269 {
270 g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
271 g23 = buff_G + (g )*rs_G + (k )*cs_G;
272 a1 = buff_A + (g - 1)*cs_A;
273 a2 = buff_A + (g )*cs_A;
274 a3 = buff_A + (g + 1)*cs_A;
275
276 gamma12 = g12->real;
277 sigma12 = g12->imag;
278 gamma23 = g23->real;
279 sigma23 = g23->imag;
280
281 is_ident12 = ( gamma12 == one && sigma12 == zero );
282 is_ident23 = ( gamma23 == one && sigma23 == zero );
283
284 m_app = min( i_k + 2 + j - iTL, m_A );
285 m_app = max( m_app, 0 );
286
287 if ( !is_ident12 && is_ident23 )
288 {
289 // Apply only to columns 1 and 2.
290
292 &gamma12,
293 &sigma12,
294 a1, 1,
295 a2, 1 );
296 }
297 else if ( is_ident12 && !is_ident23 )
298 {
299 // Apply only to columns 2 and 3.
300
302 &gamma23,
303 &sigma23,
304 a2, 1,
305 a3, 1 );
306 }
307 else if ( !is_ident12 && !is_ident23 )
308 {
309 // Apply to all three columns.
310
312 &gamma12,
313 &sigma12,
314 &gamma23,
315 &sigma23,
316 a1, 1,
317 a2, 1,
318 a3, 1 );
319 }
320 }
321 if ( n_left == 1 )
322 {
323 g23 = buff_G + (g )*rs_G + (k )*cs_G;
324 a2 = buff_A + (g )*cs_A;
325 a3 = buff_A + (g + 1)*cs_A;
326
327 gamma23 = g23->real;
328 sigma23 = g23->imag;
329
330 is_ident23 = ( gamma23 == one && sigma23 == zero );
331
332 m_app = min( i_k + 2 + j - iTL, m_A );
333 m_app = max( m_app, 0 );
334
335 if ( !is_ident23 )
337 &gamma23,
338 &sigma23,
339 a2, 1,
340 a3, 1 );
341 }
342 }
343
344 // Shutdown stage
345
346 for ( j = 1; j < k_G; ++j )
347 {
348 nG_app = k_G - j;
349 n_iter = nG_app / n_fuse;
350 n_left = nG_app % n_fuse;
351
352 for ( i = 0, k = j, g = nG - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
353 {
354 g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
355 g23 = buff_G + (g )*rs_G + (k )*cs_G;
356 a1 = buff_A + (g - 1)*cs_A;
357 a2 = buff_A + (g )*cs_A;
358 a3 = buff_A + (g + 1)*cs_A;
359
360 gamma12 = g12->real;
361 sigma12 = g12->imag;
362 gamma23 = g23->real;
363 sigma23 = g23->imag;
364
365 is_ident12 = ( gamma12 == one && sigma12 == zero );
366 is_ident23 = ( gamma23 == one && sigma23 == zero );
367
368 m_app = m_A;
369
370 if ( !is_ident12 && is_ident23 )
371 {
372 // Apply only to columns 1 and 2.
373
375 &gamma12,
376 &sigma12,
377 a1, 1,
378 a2, 1 );
379 }
380 else if ( is_ident12 && !is_ident23 )
381 {
382 // Apply only to columns 2 and 3.
383
385 &gamma23,
386 &sigma23,
387 a2, 1,
388 a3, 1 );
389 }
390 else if ( !is_ident12 && !is_ident23 )
391 {
392 // Apply to all three columns.
393
395 &gamma12,
396 &sigma12,
397 &gamma23,
398 &sigma23,
399 a1, 1,
400 a2, 1,
401 a3, 1 );
402 }
403 }
404 //for ( k = 0; k < nG_app_left; k += 1, g -= 1 )
405 if ( n_left == 1 )
406 {
407 g23 = buff_G + (g )*rs_G + (k )*cs_G;
408 a2 = buff_A + (g )*cs_A;
409 a3 = buff_A + (g + 1)*cs_A;
410
411 gamma23 = g23->real;
412 sigma23 = g23->imag;
413
414 is_ident23 = ( gamma23 == one && sigma23 == zero );
415
416 m_app = m_A;
417
418 if ( !is_ident23 )
420 &gamma23,
421 &sigma23,
422 a2, 1,
423 a3, 1 );
424 }
425 }
426
427 return FLA_SUCCESS;
428}

References bl1_d0(), bl1_d1(), FLA_Apply_G_rf_asd_var1(), i, and n_left.

Referenced by FLA_Apply_G_rf_asm_var6b(), and FLA_Apply_G_rf_bld_var6b().

◆ FLA_Apply_G_rf_asd_var7()

FLA_Error FLA_Apply_G_rf_asd_var7 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_asd_var8()

FLA_Error FLA_Apply_G_rf_asd_var8 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_asd_var8b()

FLA_Error FLA_Apply_G_rf_asd_var8b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
int  iTL,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_asd_var9()

FLA_Error FLA_Apply_G_rf_asd_var9 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double buff_A,
int  rs_A,
int  cs_A 
)
385{
386 double one = bl1_d1();
387 double zero = bl1_d0();
388 double gamma12;
389 double sigma12;
390 double gamma23;
391 double sigma23;
392 double* a1;
393 double* a2;
394 double* a3;
395 dcomplex* g12;
396 dcomplex* g23;
397 int i, j, g, k;
398 int nG, nG_app;
399 int n_iter;
400 int n_left;
401 int k_minus_1;
402 int n_fuse;
404
405 k_minus_1 = k_G - 1;
406 nG = n_A - 1;
407 n_fuse = 2;
408
409 // Use the simple variant for nG < (k - 1) or k == 1.
410 if ( nG < 2*k_minus_1 || k_G == 1 )
411 {
413 m_A,
414 n_A,
415 buff_G, rs_G, cs_G,
416 buff_A, rs_A, cs_A );
417 return FLA_SUCCESS;
418 }
419
420
421 // Start-up phase.
422
423 for ( j = -1; j < k_minus_1; j += n_fuse )
424 {
425 nG_app = j + 1;
426 n_iter = nG_app;
427 n_left = 1;
428
429 for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g )
430 {
431 g12 = buff_G + (g )*rs_G + (k )*cs_G;
432 g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
433 a1 = buff_A + (g )*cs_A;
434 a2 = buff_A + (g + 1)*cs_A;
435 a3 = buff_A + (g + 2)*cs_A;
436
437 gamma12 = g12->real;
438 sigma12 = g12->imag;
439 gamma23 = g23->real;
440 sigma23 = g23->imag;
441
442 is_ident12 = ( gamma12 == one && sigma12 == zero );
443 is_ident23 = ( gamma23 == one && sigma23 == zero );
444
445 if ( !is_ident12 && is_ident23 )
446 {
447 // Apply only to columns 1 and 2.
448
450 &gamma12,
451 &sigma12,
452 a1, 1,
453 a2, 1 );
454 }
455 else if ( is_ident12 && !is_ident23 )
456 {
457 // Apply only to columns 2 and 3.
458
460 &gamma23,
461 &sigma23,
462 a2, 1,
463 a3, 1 );
464 }
465 else if ( !is_ident12 && !is_ident23 )
466 {
467 // Apply to all three columns.
468
470 &gamma12,
471 &sigma12,
472 &gamma23,
473 &sigma23,
474 a1, 1,
475 a2, 1,
476 a3, 1 );
477 }
478 }
479
480 if ( n_left == 1 )
481 {
482 g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
483 a2 = buff_A + (g + 1)*cs_A;
484 a3 = buff_A + (g + 2)*cs_A;
485
486 gamma23 = g23->real;
487 sigma23 = g23->imag;
488
489 is_ident23 = ( gamma23 == one && sigma23 == zero );
490
491 if ( !is_ident23 )
493 &gamma23,
494 &sigma23,
495 a2, 1,
496 a3, 1 );
497 }
498 }
499
500 // Pipeline stage
501
502 for ( ; j < nG - 1; j += n_fuse )
503 {
504 nG_app = k_G;
505 n_iter = nG_app;
506 n_left = 0;
507
508 for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g )
509 {
510 g12 = buff_G + (g )*rs_G + (k )*cs_G;
511 g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
512 a1 = buff_A + (g )*cs_A;
513 a2 = buff_A + (g + 1)*cs_A;
514 a3 = buff_A + (g + 2)*cs_A;
515
516 gamma12 = g12->real;
517 sigma12 = g12->imag;
518 gamma23 = g23->real;
519 sigma23 = g23->imag;
520
521 is_ident12 = ( gamma12 == one && sigma12 == zero );
522 is_ident23 = ( gamma23 == one && sigma23 == zero );
523
524 if ( !is_ident12 && is_ident23 )
525 {
526 // Apply only to columns 1 and 2.
527
529 &gamma12,
530 &sigma12,
531 a1, 1,
532 a2, 1 );
533 }
534 else if ( is_ident12 && !is_ident23 )
535 {
536 // Apply only to columns 2 and 3.
537
539 &gamma23,
540 &sigma23,
541 a2, 1,
542 a3, 1 );
543 }
544 else if ( !is_ident12 && !is_ident23 )
545 {
546 // Apply to all three columns.
547
549 &gamma12,
550 &sigma12,
551 &gamma23,
552 &sigma23,
553 a1, 1,
554 a2, 1,
555 a3, 1 );
556 }
557 }
558 }
559
560 // Shutdown stage
561
562 for ( j = nG % n_fuse; j < k_G; j += n_fuse )
563 {
564 g = nG - 1;
565 k = j;
566
567 n_left = 1;
568 if ( n_left == 1 )
569 {
570 g12 = buff_G + (g )*rs_G + (k )*cs_G;
571 a1 = buff_A + (g )*cs_A;
572 a2 = buff_A + (g + 1)*cs_A;
573
574 gamma12 = g12->real;
575 sigma12 = g12->imag;
576
577 is_ident12 = ( gamma12 == one && sigma12 == zero );
578
579 if ( !is_ident12 )
581 &gamma12,
582 &sigma12,
583 a1, 1,
584 a2, 1 );
585 ++k;
586 --g;
587 }
588
589 nG_app = k_minus_1 - j;
590 n_iter = nG_app;
591
592 for ( i = 0; i < n_iter; ++i, ++k, --g )
593 {
594 g12 = buff_G + (g )*rs_G + (k )*cs_G;
595 g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
596 a1 = buff_A + (g )*cs_A;
597 a2 = buff_A + (g + 1)*cs_A;
598 a3 = buff_A + (g + 2)*cs_A;
599
600 gamma12 = g12->real;
601 sigma12 = g12->imag;
602 gamma23 = g23->real;
603 sigma23 = g23->imag;
604
605 is_ident12 = ( gamma12 == one && sigma12 == zero );
606 is_ident23 = ( gamma23 == one && sigma23 == zero );
607
608 if ( !is_ident12 && is_ident23 )
609 {
610 // Apply only to columns 1 and 2.
611
613 &gamma12,
614 &sigma12,
615 a1, 1,
616 a2, 1 );
617 }
618 else if ( is_ident12 && !is_ident23 )
619 {
620 // Apply only to columns 2 and 3.
621
623 &gamma23,
624 &sigma23,
625 a2, 1,
626 a3, 1 );
627 }
628 else if ( !is_ident12 && !is_ident23 )
629 {
630 // Apply to all three columns.
631
633 &gamma12,
634 &sigma12,
635 &gamma23,
636 &sigma23,
637 a1, 1,
638 a2, 1,
639 a3, 1 );
640 }
641 }
642 }
643
644 return FLA_SUCCESS;
645}

References bl1_d0(), bl1_d1(), FLA_Apply_G_rf_asd_var1(), i, and n_left.

Referenced by FLA_Apply_G_rf_asm_var9(), and FLA_Apply_G_rf_bld_var9().

◆ FLA_Apply_G_rf_asd_var9b()

FLA_Error FLA_Apply_G_rf_asd_var9b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
int  iTL,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double buff_A,
int  rs_A,
int  cs_A 
)
138{
139 double one = bl1_d1();
140 double zero = bl1_d0();
141 double gamma12;
142 double sigma12;
143 double gamma23;
144 double sigma23;
145 double* a1;
146 double* a2;
147 double* a3;
148 dcomplex* g12;
149 dcomplex* g23;
150 int i, j, g, k;
151 int nG, nG_app;
152 int n_iter;
153 int n_left;
154 int k_minus_1;
155 int n_fuse;
157 int m_app;
158
159
160 k_minus_1 = k_G - 1;
161 nG = n_A - 1;
162 n_fuse = 2;
163
164 // Use the simple variant for nG < (k - 1) or k == 1.
165 if ( nG < 2*k_minus_1 || k_G == 1 )
166 {
168 m_A,
169 n_A,
170 buff_G, rs_G, cs_G,
171 buff_A, rs_A, cs_A );
172 return FLA_SUCCESS;
173 }
174
175
176 // Start-up phase.
177
178 for ( j = -1; j < k_minus_1; j += n_fuse )
179 {
180 nG_app = j + 1;
181 n_iter = nG_app;
182 n_left = 1;
183
184 for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g )
185 {
186 g12 = buff_G + (g )*rs_G + (k )*cs_G;
187 g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
188 a1 = buff_A + (g )*cs_A;
189 a2 = buff_A + (g + 1)*cs_A;
190 a3 = buff_A + (g + 2)*cs_A;
191
192 gamma12 = g12->real;
193 sigma12 = g12->imag;
194 gamma23 = g23->real;
195 sigma23 = g23->imag;
196
197 is_ident12 = ( gamma12 == one && sigma12 == zero );
198 is_ident23 = ( gamma23 == one && sigma23 == zero );
199
200 m_app = min( i_k + 3 + j - iTL, m_A );
201 m_app = max( m_app, 0 );
202
203 if ( !is_ident12 && is_ident23 )
204 {
205 // Apply only to columns 1 and 2.
206
208 &gamma12,
209 &sigma12,
210 a1, 1,
211 a2, 1 );
212 }
213 else if ( is_ident12 && !is_ident23 )
214 {
215 // Apply only to columns 2 and 3.
216
218 &gamma23,
219 &sigma23,
220 a2, 1,
221 a3, 1 );
222 }
223 else if ( !is_ident12 && !is_ident23 )
224 {
225 // Apply to all three columns.
226
228 &gamma12,
229 &sigma12,
230 &gamma23,
231 &sigma23,
232 a1, 1,
233 a2, 1,
234 a3, 1 );
235 }
236 }
237
238 if ( n_left == 1 )
239 {
240 g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
241 a2 = buff_A + (g + 1)*cs_A;
242 a3 = buff_A + (g + 2)*cs_A;
243
244 gamma23 = g23->real;
245 sigma23 = g23->imag;
246
247 is_ident23 = ( gamma23 == one && sigma23 == zero );
248
249 m_app = min( i_k + 3 + j - iTL, m_A );
250 m_app = max( m_app, 0 );
251
252 if ( !is_ident23 )
254 &gamma23,
255 &sigma23,
256 a2, 1,
257 a3, 1 );
258 }
259 }
260
261 // Pipeline stage
262
263 for ( ; j < nG - 1; j += n_fuse )
264 {
265 nG_app = k_G;
266 n_iter = nG_app;
267 n_left = 0;
268
269 for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g )
270 {
271 g12 = buff_G + (g )*rs_G + (k )*cs_G;
272 g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
273 a1 = buff_A + (g )*cs_A;
274 a2 = buff_A + (g + 1)*cs_A;
275 a3 = buff_A + (g + 2)*cs_A;
276
277 gamma12 = g12->real;
278 sigma12 = g12->imag;
279 gamma23 = g23->real;
280 sigma23 = g23->imag;
281
282 is_ident12 = ( gamma12 == one && sigma12 == zero );
283 is_ident23 = ( gamma23 == one && sigma23 == zero );
284
285 m_app = min( i_k + 3 + j - iTL, m_A );
286 m_app = max( m_app, 0 );
287
288 if ( !is_ident12 && is_ident23 )
289 {
290 // Apply only to columns 1 and 2.
291
293 &gamma12,
294 &sigma12,
295 a1, 1,
296 a2, 1 );
297 }
298 else if ( is_ident12 && !is_ident23 )
299 {
300 // Apply only to columns 2 and 3.
301
303 &gamma23,
304 &sigma23,
305 a2, 1,
306 a3, 1 );
307 }
308 else if ( !is_ident12 && !is_ident23 )
309 {
310 // Apply to all three columns.
311
313 &gamma12,
314 &sigma12,
315 &gamma23,
316 &sigma23,
317 a1, 1,
318 a2, 1,
319 a3, 1 );
320 }
321 }
322 }
323
324 // Shutdown stage
325
326 for ( j = nG % n_fuse; j < k_G; j += n_fuse )
327 {
328 g = nG - 1;
329 k = j;
330
331 n_left = 1;
332 if ( n_left == 1 )
333 {
334 g12 = buff_G + (g )*rs_G + (k )*cs_G;
335 a1 = buff_A + (g )*cs_A;
336 a2 = buff_A + (g + 1)*cs_A;
337
338 gamma12 = g12->real;
339 sigma12 = g12->imag;
340
341 is_ident12 = ( gamma12 == one && sigma12 == zero );
342
343 m_app = m_A;
344
345 if ( !is_ident12 )
347 &gamma12,
348 &sigma12,
349 a1, 1,
350 a2, 1 );
351 ++k;
352 --g;
353 }
354
355 nG_app = k_minus_1 - j;
356 n_iter = nG_app;
357
358 for ( i = 0; i < n_iter; ++i, ++k, --g )
359 {
360 g12 = buff_G + (g )*rs_G + (k )*cs_G;
361 g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
362 a1 = buff_A + (g )*cs_A;
363 a2 = buff_A + (g + 1)*cs_A;
364 a3 = buff_A + (g + 2)*cs_A;
365
366 gamma12 = g12->real;
367 sigma12 = g12->imag;
368 gamma23 = g23->real;
369 sigma23 = g23->imag;
370
371 is_ident12 = ( gamma12 == one && sigma12 == zero );
372 is_ident23 = ( gamma23 == one && sigma23 == zero );
373
374 m_app = m_A;
375
376 if ( !is_ident12 && is_ident23 )
377 {
378 // Apply only to columns 1 and 2.
379
381 &gamma12,
382 &sigma12,
383 a1, 1,
384 a2, 1 );
385 }
386 else if ( is_ident12 && !is_ident23 )
387 {
388 // Apply only to columns 2 and 3.
389
391 &gamma23,
392 &sigma23,
393 a2, 1,
394 a3, 1 );
395 }
396 else if ( !is_ident12 && !is_ident23 )
397 {
398 // Apply to all three columns.
399
401 &gamma12,
402 &sigma12,
403 &gamma23,
404 &sigma23,
405 a1, 1,
406 a2, 1,
407 a3, 1 );
408 }
409 }
410 }
411
412 return FLA_SUCCESS;
413}

References bl1_d0(), bl1_d1(), FLA_Apply_G_rf_asd_var1(), i, and n_left.

Referenced by FLA_Apply_G_rf_asm_var9b(), and FLA_Apply_G_rf_bld_var9b().

◆ FLA_Apply_G_rf_asm_var1()

FLA_Error FLA_Apply_G_rf_asm_var1 ( FLA_Obj  G,
FLA_Obj  A 
)
32{
33 FLA_Datatype datatype;
34 int k_G, m_A, n_A;
35 int rs_G, cs_G;
36 int rs_A, cs_A;
37
38 datatype = FLA_Obj_datatype( A );
39
40 k_G = FLA_Obj_width( G );
41 m_A = FLA_Obj_length( A );
42 n_A = FLA_Obj_width( A );
43
46
49
50 switch ( datatype )
51 {
52 case FLA_FLOAT:
53 {
55 float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
56
58 m_A,
59 n_A,
61 buff_A, rs_A, cs_A );
62
63 break;
64 }
65
66 case FLA_DOUBLE:
67 {
69 double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
70
72 m_A,
73 n_A,
75 buff_A, rs_A, cs_A );
76
77 break;
78 }
79
80 case FLA_COMPLEX:
81 {
84
86 m_A,
87 n_A,
89 buff_A, rs_A, cs_A );
90
91 break;
92 }
93
95 {
98
100 m_A,
101 n_A,
102 buff_G, rs_G, cs_G,
103 buff_A, rs_A, cs_A );
104
105 break;
106 }
107 }
108
109 return FLA_SUCCESS;
110}
FLA_Error FLA_Apply_G_rf_ass_var1(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var1.c:113
FLA_Error FLA_Apply_G_rf_asz_var1(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var1.c:267
FLA_Error FLA_Apply_G_rf_asc_var1(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var1.c:215
FLA_Error FLA_Apply_G_rf_asd_var1(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var1.c:164
dim_t FLA_Obj_width(FLA_Obj obj)
Definition FLA_Query.c:123
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition FLA_Query.c:167
dim_t FLA_Obj_length(FLA_Obj obj)
Definition FLA_Query.c:116
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition FLA_Query.c:174
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition FLA_Query.c:13
int FLA_Datatype
Definition FLA_type_defs.h:49

References FLA_Apply_G_rf_asc_var1(), FLA_Apply_G_rf_asd_var1(), FLA_Apply_G_rf_ass_var1(), FLA_Apply_G_rf_asz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), and i.

◆ FLA_Apply_G_rf_asm_var2()

FLA_Error FLA_Apply_G_rf_asm_var2 ( FLA_Obj  G,
FLA_Obj  A 
)
32{
33 FLA_Datatype datatype;
34 int k_G, m_A, n_A;
35 int rs_G, cs_G;
36 int rs_A, cs_A;
37
38 datatype = FLA_Obj_datatype( A );
39
40 k_G = FLA_Obj_width( G );
41 m_A = FLA_Obj_length( A );
42 n_A = FLA_Obj_width( A );
43
46
49
50 switch ( datatype )
51 {
52 case FLA_FLOAT:
53 {
55 float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
56
58 m_A,
59 n_A,
61 buff_A, rs_A, cs_A );
62
63 break;
64 }
65
66 case FLA_DOUBLE:
67 {
69 double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
70
72 m_A,
73 n_A,
75 buff_A, rs_A, cs_A );
76
77 break;
78 }
79
80 case FLA_COMPLEX:
81 {
84
86 m_A,
87 n_A,
89 buff_A, rs_A, cs_A );
90
91 break;
92 }
93
95 {
98
100 m_A,
101 n_A,
102 buff_G, rs_G, cs_G,
103 buff_A, rs_A, cs_A );
104
105 break;
106 }
107 }
108
109 return FLA_SUCCESS;
110}
FLA_Error FLA_Apply_G_rf_asc_var2(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var2.c:339
FLA_Error FLA_Apply_G_rf_asd_var2(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var2.c:226
FLA_Error FLA_Apply_G_rf_ass_var2(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var2.c:113
FLA_Error FLA_Apply_G_rf_asz_var2(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var2.c:452

References FLA_Apply_G_rf_asc_var2(), FLA_Apply_G_rf_asd_var2(), FLA_Apply_G_rf_ass_var2(), FLA_Apply_G_rf_asz_var2(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), and i.

◆ FLA_Apply_G_rf_asm_var3()

FLA_Error FLA_Apply_G_rf_asm_var3 ( FLA_Obj  G,
FLA_Obj  A 
)
32{
33 FLA_Datatype datatype;
34 int k_G, m_A, n_A;
35 int rs_G, cs_G;
36 int rs_A, cs_A;
37
38 datatype = FLA_Obj_datatype( A );
39
40 k_G = FLA_Obj_width( G );
41 m_A = FLA_Obj_length( A );
42 n_A = FLA_Obj_width( A );
43
46
49
50 switch ( datatype )
51 {
52 case FLA_FLOAT:
53 {
55 float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
56
58 m_A,
59 n_A,
61 buff_A, rs_A, cs_A );
62
63 break;
64 }
65
66 case FLA_DOUBLE:
67 {
69 double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
70
72 m_A,
73 n_A,
75 buff_A, rs_A, cs_A );
76
77 break;
78 }
79
80 case FLA_COMPLEX:
81 {
84
86 m_A,
87 n_A,
89 buff_A, rs_A, cs_A );
90
91 break;
92 }
93
95 {
98
100 m_A,
101 n_A,
102 buff_G, rs_G, cs_G,
103 buff_A, rs_A, cs_A );
104
105 break;
106 }
107 }
108
109 return FLA_SUCCESS;
110}
FLA_Error FLA_Apply_G_rf_ass_var3(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var3.c:113
FLA_Error FLA_Apply_G_rf_asz_var3(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var3.c:1473
FLA_Error FLA_Apply_G_rf_asd_var3(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var3.c:566
FLA_Error FLA_Apply_G_rf_asc_var3(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var3.c:1019

References FLA_Apply_G_rf_asc_var3(), FLA_Apply_G_rf_asd_var3(), FLA_Apply_G_rf_ass_var3(), FLA_Apply_G_rf_asz_var3(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), and i.

◆ FLA_Apply_G_rf_asm_var3b()

FLA_Error FLA_Apply_G_rf_asm_var3b ( FLA_Obj  G,
FLA_Obj  A 
)
29{
30 FLA_Datatype datatype;
31 int k_G, m_A, n_A;
32 int rs_G, cs_G;
33 int rs_A, cs_A;
34
35 datatype = FLA_Obj_datatype( A );
36
37 k_G = FLA_Obj_width( G );
38 m_A = FLA_Obj_length( A );
39 n_A = FLA_Obj_width( A );
40
43
46
47 switch ( datatype )
48 {
49 case FLA_FLOAT:
50 {
52 float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
53
55 m_A,
56 n_A,
57 0,
58 0,
60 buff_A, rs_A, cs_A );
61
62 break;
63 }
64
65 case FLA_DOUBLE:
66 {
68 double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
69
71 m_A,
72 n_A,
73 0,
74 0,
76 buff_A, rs_A, cs_A );
77
78 break;
79 }
80
81 case FLA_COMPLEX:
82 {
85
87 m_A,
88 n_A,
89 0,
90 0,
92 buff_A, rs_A, cs_A );
93
94 break;
95 }
96
98 {
101
103 m_A,
104 n_A,
105 0,
106 0,
107 buff_G, rs_G, cs_G,
108 buff_A, rs_A, cs_A );
109
110 break;
111 }
112 }
113
114 return FLA_SUCCESS;
115}
FLA_Error FLA_Apply_G_rf_asd_var3b(int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var3b.c:131
FLA_Error FLA_Apply_G_rf_asz_var3b(int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var3b.c:620
FLA_Error FLA_Apply_G_rf_asc_var3b(int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var3b.c:607
FLA_Error FLA_Apply_G_rf_ass_var3b(int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var3b.c:118

References FLA_Apply_G_rf_asc_var3b(), FLA_Apply_G_rf_asd_var3b(), FLA_Apply_G_rf_ass_var3b(), FLA_Apply_G_rf_asz_var3b(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), and i.

◆ FLA_Apply_G_rf_asm_var4()

FLA_Error FLA_Apply_G_rf_asm_var4 ( FLA_Obj  G,
FLA_Obj  A 
)

◆ FLA_Apply_G_rf_asm_var5()

FLA_Error FLA_Apply_G_rf_asm_var5 ( FLA_Obj  G,
FLA_Obj  A 
)

◆ FLA_Apply_G_rf_asm_var5b()

FLA_Error FLA_Apply_G_rf_asm_var5b ( FLA_Obj  G,
FLA_Obj  A 
)

◆ FLA_Apply_G_rf_asm_var6()

FLA_Error FLA_Apply_G_rf_asm_var6 ( FLA_Obj  G,
FLA_Obj  A 
)
32{
33 FLA_Datatype datatype;
34 int k_G, m_A, n_A;
35 int rs_G, cs_G;
36 int rs_A, cs_A;
37
38 datatype = FLA_Obj_datatype( A );
39
40 k_G = FLA_Obj_width( G );
41 m_A = FLA_Obj_length( A );
42 n_A = FLA_Obj_width( A );
43
46
49
50 switch ( datatype )
51 {
52 case FLA_FLOAT:
53 {
55 float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
56
58 m_A,
59 n_A,
61 buff_A, rs_A, cs_A );
62
63 break;
64 }
65
66 case FLA_DOUBLE:
67 {
69 double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
70
72 m_A,
73 n_A,
75 buff_A, rs_A, cs_A );
76
77 break;
78 }
79
80 case FLA_COMPLEX:
81 {
84
86 m_A,
87 n_A,
89 buff_A, rs_A, cs_A );
90
91 break;
92 }
93
95 {
98
100 m_A,
101 n_A,
102 buff_G, rs_G, cs_G,
103 buff_A, rs_A, cs_A );
104
105 break;
106 }
107 }
108
109 return FLA_SUCCESS;
110}
FLA_Error FLA_Apply_G_rf_asc_var6(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var6.c:675
FLA_Error FLA_Apply_G_rf_asd_var6(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var6.c:394
FLA_Error FLA_Apply_G_rf_asz_var6(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var6.c:956
FLA_Error FLA_Apply_G_rf_ass_var6(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var6.c:113

References FLA_Apply_G_rf_asc_var6(), FLA_Apply_G_rf_asd_var6(), FLA_Apply_G_rf_ass_var6(), FLA_Apply_G_rf_asz_var6(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), and i.

◆ FLA_Apply_G_rf_asm_var6b()

FLA_Error FLA_Apply_G_rf_asm_var6b ( FLA_Obj  G,
FLA_Obj  A 
)
29{
30 FLA_Datatype datatype;
31 int k_G, m_A, n_A;
32 int rs_G, cs_G;
33 int rs_A, cs_A;
34
35 datatype = FLA_Obj_datatype( A );
36
37 k_G = FLA_Obj_width( G );
38 m_A = FLA_Obj_length( A );
39 n_A = FLA_Obj_width( A );
40
43
46
47 switch ( datatype )
48 {
49 case FLA_FLOAT:
50 {
52 float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
53
55 m_A,
56 n_A,
57 0,
58 0,
60 buff_A, rs_A, cs_A );
61
62 break;
63 }
64
65 case FLA_DOUBLE:
66 {
68 double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
69
71 m_A,
72 n_A,
73 0,
74 0,
76 buff_A, rs_A, cs_A );
77
78 break;
79 }
80
81 case FLA_COMPLEX:
82 {
85
87 m_A,
88 n_A,
89 0,
90 0,
92 buff_A, rs_A, cs_A );
93
94 break;
95 }
96
98 {
101
103 m_A,
104 n_A,
105 0,
106 0,
107 buff_G, rs_G, cs_G,
108 buff_A, rs_A, cs_A );
109
110 break;
111 }
112 }
113
114 return FLA_SUCCESS;
115}
FLA_Error FLA_Apply_G_rf_asd_var6b(int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var6b.c:131
FLA_Error FLA_Apply_G_rf_ass_var6b(int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var6b.c:118
FLA_Error FLA_Apply_G_rf_asc_var6b(int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var6b.c:430
FLA_Error FLA_Apply_G_rf_asz_var6b(int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var6b.c:443

References FLA_Apply_G_rf_asc_var6b(), FLA_Apply_G_rf_asd_var6b(), FLA_Apply_G_rf_ass_var6b(), FLA_Apply_G_rf_asz_var6b(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), and i.

◆ FLA_Apply_G_rf_asm_var7()

FLA_Error FLA_Apply_G_rf_asm_var7 ( FLA_Obj  G,
FLA_Obj  A 
)

◆ FLA_Apply_G_rf_asm_var8()

FLA_Error FLA_Apply_G_rf_asm_var8 ( FLA_Obj  G,
FLA_Obj  A 
)

◆ FLA_Apply_G_rf_asm_var8b()

FLA_Error FLA_Apply_G_rf_asm_var8b ( FLA_Obj  G,
FLA_Obj  A 
)

◆ FLA_Apply_G_rf_asm_var9()

FLA_Error FLA_Apply_G_rf_asm_var9 ( FLA_Obj  G,
FLA_Obj  A 
)
32{
33 FLA_Datatype datatype;
34 int k_G, m_A, n_A;
35 int rs_G, cs_G;
36 int rs_A, cs_A;
37
38 datatype = FLA_Obj_datatype( A );
39
40 k_G = FLA_Obj_width( G );
41 m_A = FLA_Obj_length( A );
42 n_A = FLA_Obj_width( A );
43
46
49
50 switch ( datatype )
51 {
52 case FLA_FLOAT:
53 {
55 float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
56
58 m_A,
59 n_A,
61 buff_A, rs_A, cs_A );
62
63 break;
64 }
65
66 case FLA_DOUBLE:
67 {
69 double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
70
72 m_A,
73 n_A,
75 buff_A, rs_A, cs_A );
76
77 break;
78 }
79
80 case FLA_COMPLEX:
81 {
84
86 m_A,
87 n_A,
89 buff_A, rs_A, cs_A );
90
91 break;
92 }
93
95 {
98
100 m_A,
101 n_A,
102 buff_G, rs_G, cs_G,
103 buff_A, rs_A, cs_A );
104
105 break;
106 }
107 }
108
109 return FLA_SUCCESS;
110}
FLA_Error FLA_Apply_G_rf_asc_var9(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var9.c:647
FLA_Error FLA_Apply_G_rf_asd_var9(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var9.c:380
FLA_Error FLA_Apply_G_rf_asz_var9(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var9.c:914
FLA_Error FLA_Apply_G_rf_ass_var9(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var9.c:113

References FLA_Apply_G_rf_asc_var9(), FLA_Apply_G_rf_asd_var9(), FLA_Apply_G_rf_ass_var9(), FLA_Apply_G_rf_asz_var9(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), and i.

◆ FLA_Apply_G_rf_asm_var9b()

FLA_Error FLA_Apply_G_rf_asm_var9b ( FLA_Obj  G,
FLA_Obj  A 
)
29{
30 FLA_Datatype datatype;
31 int k_G, m_A, n_A;
32 int rs_G, cs_G;
33 int rs_A, cs_A;
34
35 datatype = FLA_Obj_datatype( A );
36
37 k_G = FLA_Obj_width( G );
38 m_A = FLA_Obj_length( A );
39 n_A = FLA_Obj_width( A );
40
43
46
47 switch ( datatype )
48 {
49 case FLA_FLOAT:
50 {
52 float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
53
55 m_A,
56 n_A,
57 0,
58 0,
60 buff_A, rs_A, cs_A );
61
62 break;
63 }
64
65 case FLA_DOUBLE:
66 {
68 double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
69
71 m_A,
72 n_A,
73 0,
74 0,
76 buff_A, rs_A, cs_A );
77
78 break;
79 }
80
81 case FLA_COMPLEX:
82 {
85
87 m_A,
88 n_A,
89 0,
90 0,
92 buff_A, rs_A, cs_A );
93
94 break;
95 }
96
98 {
101
103 m_A,
104 n_A,
105 0,
106 0,
107 buff_G, rs_G, cs_G,
108 buff_A, rs_A, cs_A );
109
110 break;
111 }
112 }
113
114 return FLA_SUCCESS;
115}
FLA_Error FLA_Apply_G_rf_asc_var9b(int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var9b.c:415
FLA_Error FLA_Apply_G_rf_ass_var9b(int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var9b.c:118
FLA_Error FLA_Apply_G_rf_asd_var9b(int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var9b.c:131
FLA_Error FLA_Apply_G_rf_asz_var9b(int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var9b.c:428

References FLA_Apply_G_rf_asc_var9b(), FLA_Apply_G_rf_asd_var9b(), FLA_Apply_G_rf_ass_var9b(), FLA_Apply_G_rf_asz_var9b(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), and i.

◆ FLA_Apply_G_rf_ass_var1()

FLA_Error FLA_Apply_G_rf_ass_var1 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
float buff_A,
int  rs_A,
int  cs_A 
)
118{
119 float one = bl1_s1();
120 float zero = bl1_s0();
121 int nG_app = n_A - 1;
122 int l, j;
123 float gamma;
124 float sigma;
125 float* a1;
126 float* a2;
127 scomplex* g1;
128 scomplex* g11;
129
130 g1 = buff_G;
131
132 for ( l = 0; l < k_G; ++l )
133 {
134 a1 = buff_A;
135 a2 = buff_A + cs_A;
136 g11 = g1;
137
138 for ( j = 0; j < nG_app; ++j )
139 {
140 gamma = g11->real;
141 sigma = g11->imag;
142
143 // Skip the current iteration if the rotation is identity.
144 if ( gamma != one || sigma != zero )
145 {
147 &gamma,
148 &sigma,
149 a1, 1,
150 a2, 1 );
151 }
152
153 a1 += cs_A;
154 a2 += cs_A;
155 g11 += rs_G;
156 }
157
158 g1 += cs_G;
159 }
160
161 return FLA_SUCCESS;
162}

References bl1_s0(), bl1_s1(), i, and scomplex::real.

Referenced by FLA_Apply_G_rf_asm_var1(), FLA_Apply_G_rf_ass_var2(), FLA_Apply_G_rf_ass_var3(), FLA_Apply_G_rf_ass_var6(), FLA_Apply_G_rf_ass_var9(), and FLA_Apply_G_rf_bls_var1().

◆ FLA_Apply_G_rf_ass_var2()

FLA_Error FLA_Apply_G_rf_ass_var2 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
float buff_A,
int  rs_A,
int  cs_A 
)
118{
119 float one = bl1_s1();
120 float zero = bl1_s0();
121 float gamma;
122 float sigma;
123 float* a1;
124 float* a2;
125 scomplex* g11;
126 int j, g, k;
127 int nG, nG_app;
128 int k_minus_1;
129
130 k_minus_1 = k_G - 1;
131 nG = n_A - 1;
132
133 // Use the simple variant for nG < 2(k - 1).
134 if ( nG < k_minus_1 || k_G == 1 )
135 {
137 m_A,
138 n_A,
139 buff_G, rs_G, cs_G,
140 buff_A, rs_A, cs_A );
141 return FLA_SUCCESS;
142 }
143
144
145 // Start-up phase.
146
147 for ( j = 0; j < k_minus_1; ++j )
148 {
149 nG_app = j + 1;
150
151 for ( k = 0, g = nG_app - 1; k < nG_app; ++k, --g )
152 {
153 g11 = buff_G + (g )*rs_G + (k )*cs_G;
154 a1 = buff_A + (g )*cs_A;
155 a2 = buff_A + (g + 1)*cs_A;
156
157 gamma = g11->real;
158 sigma = g11->imag;
159
160 // Skip the current iteration if the rotation is identity.
161 if ( gamma == one && sigma == zero ) continue;
162
164 &gamma,
165 &sigma,
166 a1, 1,
167 a2, 1 );
168 }
169 }
170
171 // Pipeline stage
172
173 for ( j = k_minus_1; j < nG; ++j )
174 {
175 nG_app = k_G;
176
177 for ( k = 0, g = j; k < nG_app; ++k, --g )
178 {
179 g11 = buff_G + (g )*rs_G + (k )*cs_G;
180 a1 = buff_A + (g )*cs_A;
181 a2 = buff_A + (g + 1)*cs_A;
182
183 gamma = g11->real;
184 sigma = g11->imag;
185
186 // Skip the current iteration if the rotation is identity.
187 if ( gamma == one && sigma == zero ) continue;
188
190 &gamma,
191 &sigma,
192 a1, 1,
193 a2, 1 );
194 }
195 }
196
197 // Shutdown stage
198
199 for ( j = nG - k_minus_1; j < nG; ++j )
200 {
201 nG_app = nG - j;
202
203 for ( k = k_G - nG_app, g = nG - 1; k < k_G; ++k, --g )
204 {
205 g11 = buff_G + (g )*rs_G + (k )*cs_G;
206 a1 = buff_A + (g )*cs_A;
207 a2 = buff_A + (g + 1)*cs_A;
208
209 gamma = g11->real;
210 sigma = g11->imag;
211
212 // Skip the current iteration if the rotation is identity.
213 if ( gamma == one && sigma == zero ) continue;
214
216 &gamma,
217 &sigma,
218 a1, 1,
219 a2, 1 );
220 }
221 }
222
223 return FLA_SUCCESS;
224}
FLA_Error FLA_Apply_G_rf_ass_var1(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var1.c:113

References bl1_s0(), bl1_s1(), FLA_Apply_G_rf_ass_var1(), and i.

Referenced by FLA_Apply_G_rf_asm_var2(), and FLA_Apply_G_rf_bls_var2().

◆ FLA_Apply_G_rf_ass_var3()

FLA_Error FLA_Apply_G_rf_ass_var3 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
float buff_A,
int  rs_A,
int  cs_A 
)
118{
119 float one = bl1_s1();
120 float zero = bl1_s0();
121 float gamma23_k1;
122 float sigma23_k1;
123 float gamma34_k1;
124 float sigma34_k1;
125 float gamma12_k2;
126 float sigma12_k2;
127 float gamma23_k2;
128 float sigma23_k2;
129 float* a1;
130 float* a2;
131 float* a3;
132 float* a4;
137 int i, j, g, k;
138 int nG, nG_app;
139 int n_iter;
140 int n_left;
141 int k_minus_1;
142 int n_fuse;
143 int k_fuse;
146 int has_ident;
147
148 k_minus_1 = k_G - 1;
149 nG = n_A - 1;
150 n_fuse = 2;
151 k_fuse = 2;
152
153 // Use the simple variant for nG < (k - 1) or k == 1.
154 if ( nG < 2*k_minus_1 || k_G == 1 )
155 {
157 m_A,
158 n_A,
159 buff_G, rs_G, cs_G,
160 buff_A, rs_A, cs_A );
161 return FLA_SUCCESS;
162 }
163
164
165 // Start-up phase.
166
167 for ( j = -1; j < k_minus_1; j += n_fuse )
168 {
169 nG_app = j + 2;
170 n_iter = nG_app / k_fuse;
171 n_left = 1;
172
173 for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
174 {
175 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
176 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
177 g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
178 g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
179 a1 = buff_A + (g - 1)*cs_A;
180 a2 = buff_A + (g )*cs_A;
181 a3 = buff_A + (g + 1)*cs_A;
182 a4 = buff_A + (g + 2)*cs_A;
183
184 gamma23_k1 = g23_k1->real;
185 sigma23_k1 = g23_k1->imag;
186 gamma34_k1 = g34_k1->real;
187 sigma34_k1 = g34_k1->imag;
188 gamma12_k2 = g12_k2->real;
189 sigma12_k2 = g12_k2->imag;
190 gamma23_k2 = g23_k2->real;
191 sigma23_k2 = g23_k2->imag;
192
199
200 if ( has_ident )
201 {
202 // Apply to pairs of columns as needed.
203
204 if ( !is_ident23_k1 )
206 &gamma23_k1,
207 &sigma23_k1,
208 a2, 1,
209 a3, 1 );
210
211 if ( !is_ident34_k1 )
213 &gamma34_k1,
214 &sigma34_k1,
215 a3, 1,
216 a4, 1 );
217
218 if ( !is_ident12_k2 )
220 &gamma12_k2,
221 &sigma12_k2,
222 a1, 1,
223 a2, 1 );
224
225 if ( !is_ident23_k2 )
227 &gamma23_k2,
228 &sigma23_k2,
229 a2, 1,
230 a3, 1 );
231 }
232 else
233 {
234 // Apply to all four columns.
235
237 &gamma23_k1,
238 &sigma23_k1,
239 &gamma34_k1,
240 &sigma34_k1,
241 &gamma12_k2,
242 &sigma12_k2,
243 &gamma23_k2,
244 &sigma23_k2,
245 a1, 1,
246 a2, 1,
247 a3, 1,
248 a4, 1 );
249 }
250 }
251
252 if ( n_left == 1 )
253 {
254 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
255 a3 = buff_A + (g + 1)*cs_A;
256 a4 = buff_A + (g + 2)*cs_A;
257
258 gamma34_k1 = g34_k1->real;
259 sigma34_k1 = g34_k1->imag;
260
262
263 if ( !is_ident34_k1 )
265 &gamma34_k1,
266 &sigma34_k1,
267 a3, 1,
268 a4, 1 );
269 }
270 }
271
272 // Pipeline stage
273
274 for ( ; j < nG - 1; j += n_fuse )
275 {
276 nG_app = k_G;
277 n_iter = nG_app / k_fuse;
278 n_left = nG_app % k_fuse;
279
280 for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
281 {
282 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
283 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
284 g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
285 g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
286 a1 = buff_A + (g - 1)*cs_A;
287 a2 = buff_A + (g )*cs_A;
288 a3 = buff_A + (g + 1)*cs_A;
289 a4 = buff_A + (g + 2)*cs_A;
290
291 gamma23_k1 = g23_k1->real;
292 sigma23_k1 = g23_k1->imag;
293 gamma34_k1 = g34_k1->real;
294 sigma34_k1 = g34_k1->imag;
295 gamma12_k2 = g12_k2->real;
296 sigma12_k2 = g12_k2->imag;
297 gamma23_k2 = g23_k2->real;
298 sigma23_k2 = g23_k2->imag;
299
306
307 if ( has_ident )
308 {
309 // Apply to pairs of columns as needed.
310
311 if ( !is_ident23_k1 )
313 &gamma23_k1,
314 &sigma23_k1,
315 a2, 1,
316 a3, 1 );
317
318 if ( !is_ident34_k1 )
320 &gamma34_k1,
321 &sigma34_k1,
322 a3, 1,
323 a4, 1 );
324
325 if ( !is_ident12_k2 )
327 &gamma12_k2,
328 &sigma12_k2,
329 a1, 1,
330 a2, 1 );
331
332 if ( !is_ident23_k2 )
334 &gamma23_k2,
335 &sigma23_k2,
336 a2, 1,
337 a3, 1 );
338 }
339 else
340 {
341 // Apply to all four columns.
342
344 &gamma23_k1,
345 &sigma23_k1,
346 &gamma34_k1,
347 &sigma34_k1,
348 &gamma12_k2,
349 &sigma12_k2,
350 &gamma23_k2,
351 &sigma23_k2,
352 a1, 1,
353 a2, 1,
354 a3, 1,
355 a4, 1 );
356 }
357 }
358
359 if ( n_left == 1 )
360 {
361 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
362 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
363 a2 = buff_A + (g )*cs_A;
364 a3 = buff_A + (g + 1)*cs_A;
365 a4 = buff_A + (g + 2)*cs_A;
366
367 gamma23_k1 = g23_k1->real;
368 sigma23_k1 = g23_k1->imag;
369 gamma34_k1 = g34_k1->real;
370 sigma34_k1 = g34_k1->imag;
371
374
376 {
378 &gamma23_k1,
379 &sigma23_k1,
380 a2, 1,
381 a3, 1 );
382 }
383 else if ( is_ident23_k1 && !is_ident34_k1 )
384 {
386 &gamma34_k1,
387 &sigma34_k1,
388 a3, 1,
389 a4, 1 );
390 }
391 else
392 {
394 &gamma23_k1,
395 &sigma23_k1,
396 &gamma34_k1,
397 &sigma34_k1,
398 a2, 1,
399 a3, 1,
400 a4, 1 );
401 }
402 }
403 }
404
405 // Shutdown stage
406
407 for ( j = nG % n_fuse; j < k_G; j += n_fuse )
408 {
409 g = nG - 1;
410 k = j;
411
412 //n_left = 1;
413 //if ( n_left == 1 )
414 {
415 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
416 a2 = buff_A + (g )*cs_A;
417 a3 = buff_A + (g + 1)*cs_A;
418
419 gamma23_k1 = g23_k1->real;
420 sigma23_k1 = g23_k1->imag;
421
423
424 if ( !is_ident23_k1 )
426 &gamma23_k1,
427 &sigma23_k1,
428 a2, 1,
429 a3, 1 );
430 ++k;
431 --g;
432 }
433
434 nG_app = k_minus_1 - j;
435 n_iter = nG_app / k_fuse;
436 n_left = nG_app % k_fuse;
437
438 for ( i = 0; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
439 {
440 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
441 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
442 g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
443 g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
444 a1 = buff_A + (g - 1)*cs_A;
445 a2 = buff_A + (g )*cs_A;
446 a3 = buff_A + (g + 1)*cs_A;
447 a4 = buff_A + (g + 2)*cs_A;
448
449 gamma23_k1 = g23_k1->real;
450 sigma23_k1 = g23_k1->imag;
451 gamma34_k1 = g34_k1->real;
452 sigma34_k1 = g34_k1->imag;
453 gamma12_k2 = g12_k2->real;
454 sigma12_k2 = g12_k2->imag;
455 gamma23_k2 = g23_k2->real;
456 sigma23_k2 = g23_k2->imag;
457
464
465 if ( has_ident )
466 {
467 // Apply to pairs of columns as needed.
468
469 if ( !is_ident23_k1 )
471 &gamma23_k1,
472 &sigma23_k1,
473 a2, 1,
474 a3, 1 );
475
476 if ( !is_ident34_k1 )
478 &gamma34_k1,
479 &sigma34_k1,
480 a3, 1,
481 a4, 1 );
482
483 if ( !is_ident12_k2 )
485 &gamma12_k2,
486 &sigma12_k2,
487 a1, 1,
488 a2, 1 );
489
490 if ( !is_ident23_k2 )
492 &gamma23_k2,
493 &sigma23_k2,
494 a2, 1,
495 a3, 1 );
496 }
497 else
498 {
499 // Apply to all four columns.
500
502 &gamma23_k1,
503 &sigma23_k1,
504 &gamma34_k1,
505 &sigma34_k1,
506 &gamma12_k2,
507 &sigma12_k2,
508 &gamma23_k2,
509 &sigma23_k2,
510 a1, 1,
511 a2, 1,
512 a3, 1,
513 a4, 1 );
514 }
515 }
516
517 if ( n_left == 1 )
518 {
519 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
520 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
521 a2 = buff_A + (g )*cs_A;
522 a3 = buff_A + (g + 1)*cs_A;
523 a4 = buff_A + (g + 2)*cs_A;
524
525 gamma23_k1 = g23_k1->real;
526 sigma23_k1 = g23_k1->imag;
527 gamma34_k1 = g34_k1->real;
528 sigma34_k1 = g34_k1->imag;
529
532
534 {
536 &gamma23_k1,
537 &sigma23_k1,
538 a2, 1,
539 a3, 1 );
540 }
541 else if ( is_ident23_k1 && !is_ident34_k1 )
542 {
544 &gamma34_k1,
545 &sigma34_k1,
546 a3, 1,
547 a4, 1 );
548 }
549 else
550 {
552 &gamma23_k1,
553 &sigma23_k1,
554 &gamma34_k1,
555 &sigma34_k1,
556 a2, 1,
557 a3, 1,
558 a4, 1 );
559 }
560 }
561 }
562
563 return FLA_SUCCESS;
564}

References bl1_s0(), bl1_s1(), FLA_Apply_G_rf_ass_var1(), i, and n_left.

Referenced by FLA_Apply_G_rf_asm_var3().

◆ FLA_Apply_G_rf_ass_var3b()

FLA_Error FLA_Apply_G_rf_ass_var3b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
int  iTL,
scomplex buff_G,
int  rs_G,
int  cs_G,
float buff_A,
int  rs_A,
int  cs_A 
)
125{
127
128 return FLA_SUCCESS;
129}

References i.

Referenced by FLA_Apply_G_rf_asm_var3b(), and FLA_Apply_G_rf_bls_var3b().

◆ FLA_Apply_G_rf_ass_var4()

FLA_Error FLA_Apply_G_rf_ass_var4 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
float buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_ass_var5()

FLA_Error FLA_Apply_G_rf_ass_var5 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
float buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_ass_var5b()

FLA_Error FLA_Apply_G_rf_ass_var5b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
int  iTL,
scomplex buff_G,
int  rs_G,
int  cs_G,
float buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_ass_var6()

FLA_Error FLA_Apply_G_rf_ass_var6 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
float buff_A,
int  rs_A,
int  cs_A 
)
118{
119 float one = bl1_s1();
120 float zero = bl1_s0();
121 float gamma12;
122 float sigma12;
123 float gamma23;
124 float sigma23;
125 float* a1;
126 float* a2;
127 float* a3;
128 scomplex* g12;
129 scomplex* g23;
130 int i, j, g, k;
131 int nG, nG_app;
132 int n_iter;
133 int n_left;
134 int k_minus_1;
135 int n_fuse;
137
138 k_minus_1 = k_G - 1;
139 nG = n_A - 1;
140 n_fuse = 2;
141
142 // Use the simple variant for nG < (k - 1) or k == 1.
143 if ( nG < k_minus_1 || k_G == 1 )
144 {
146 m_A,
147 n_A,
148 buff_G, rs_G, cs_G,
149 buff_A, rs_A, cs_A );
150 return FLA_SUCCESS;
151 }
152
153
154 // Start-up phase.
155
156 for ( j = 0; j < k_minus_1; ++j )
157 {
158 nG_app = j + 1;
159 n_iter = nG_app / n_fuse;
160 n_left = nG_app % n_fuse;
161
162 for ( i = 0, k = 0, g = nG_app - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
163 {
164 g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
165 g23 = buff_G + (g )*rs_G + (k )*cs_G;
166 a1 = buff_A + (g - 1)*cs_A;
167 a2 = buff_A + (g )*cs_A;
168 a3 = buff_A + (g + 1)*cs_A;
169
170 gamma12 = g12->real;
171 sigma12 = g12->imag;
172 gamma23 = g23->real;
173 sigma23 = g23->imag;
174
175 is_ident12 = ( gamma12 == one && sigma12 == zero );
176 is_ident23 = ( gamma23 == one && sigma23 == zero );
177
178 if ( !is_ident12 && is_ident23 )
179 {
180 // Apply only to columns 1 and 2.
181
183 &gamma12,
184 &sigma12,
185 a1, 1,
186 a2, 1 );
187 }
188 else if ( is_ident12 && !is_ident23 )
189 {
190 // Apply only to columns 2 and 3.
191
193 &gamma23,
194 &sigma23,
195 a2, 1,
196 a3, 1 );
197 }
198 else if ( !is_ident12 && !is_ident23 )
199 {
200 // Apply to all three columns.
201
203 &gamma12,
204 &sigma12,
205 &gamma23,
206 &sigma23,
207 a1, 1,
208 a2, 1,
209 a3, 1 );
210 }
211 }
212
213 if ( n_left == 1 )
214 {
215 g23 = buff_G + (g )*rs_G + (k )*cs_G;
216 a2 = buff_A + (g )*cs_A;
217 a3 = buff_A + (g + 1)*cs_A;
218
219 gamma23 = g23->real;
220 sigma23 = g23->imag;
221
222 is_ident23 = ( gamma23 == one && sigma23 == zero );
223
224 if ( !is_ident23 )
226 &gamma23,
227 &sigma23,
228 a2, 1,
229 a3, 1 );
230 }
231 }
232
233 // Pipeline stage
234
235 for ( j = k_minus_1; j < nG; ++j )
236 {
237 nG_app = k_G;
238 n_iter = nG_app / n_fuse;
239 n_left = nG_app % n_fuse;
240
241 for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
242 {
243 g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
244 g23 = buff_G + (g )*rs_G + (k )*cs_G;
245 a1 = buff_A + (g - 1)*cs_A;
246 a2 = buff_A + (g )*cs_A;
247 a3 = buff_A + (g + 1)*cs_A;
248
249 gamma12 = g12->real;
250 sigma12 = g12->imag;
251 gamma23 = g23->real;
252 sigma23 = g23->imag;
253
254 is_ident12 = ( gamma12 == one && sigma12 == zero );
255 is_ident23 = ( gamma23 == one && sigma23 == zero );
256
257 if ( !is_ident12 && is_ident23 )
258 {
259 // Apply only to columns 1 and 2.
260
262 &gamma12,
263 &sigma12,
264 a1, 1,
265 a2, 1 );
266 }
267 else if ( is_ident12 && !is_ident23 )
268 {
269 // Apply only to columns 2 and 3.
270
272 &gamma23,
273 &sigma23,
274 a2, 1,
275 a3, 1 );
276 }
277 else if ( !is_ident12 && !is_ident23 )
278 {
279 // Apply to all three columns.
280
282 &gamma12,
283 &sigma12,
284 &gamma23,
285 &sigma23,
286 a1, 1,
287 a2, 1,
288 a3, 1 );
289 }
290 }
291
292 if ( n_left == 1 )
293 {
294 g23 = buff_G + (g )*rs_G + (k )*cs_G;
295 a2 = buff_A + (g )*cs_A;
296 a3 = buff_A + (g + 1)*cs_A;
297
298 gamma23 = g23->real;
299 sigma23 = g23->imag;
300
301 is_ident23 = ( gamma23 == one && sigma23 == zero );
302
303 if ( !is_ident23 )
305 &gamma23,
306 &sigma23,
307 a2, 1,
308 a3, 1 );
309 }
310 }
311
312 // Shutdown stage
313
314 for ( j = 1; j < k_G; ++j )
315 {
316 nG_app = k_G - j;
317 n_iter = nG_app / n_fuse;
318 n_left = nG_app % n_fuse;
319
320 for ( i = 0, k = j, g = nG - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
321 {
322 g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
323 g23 = buff_G + (g )*rs_G + (k )*cs_G;
324 a1 = buff_A + (g - 1)*cs_A;
325 a2 = buff_A + (g )*cs_A;
326 a3 = buff_A + (g + 1)*cs_A;
327
328 gamma12 = g12->real;
329 sigma12 = g12->imag;
330 gamma23 = g23->real;
331 sigma23 = g23->imag;
332
333 is_ident12 = ( gamma12 == one && sigma12 == zero );
334 is_ident23 = ( gamma23 == one && sigma23 == zero );
335
336 if ( !is_ident12 && is_ident23 )
337 {
338 // Apply only to columns 1 and 2.
339
341 &gamma12,
342 &sigma12,
343 a1, 1,
344 a2, 1 );
345 }
346 else if ( is_ident12 && !is_ident23 )
347 {
348 // Apply only to columns 2 and 3.
349
351 &gamma23,
352 &sigma23,
353 a2, 1,
354 a3, 1 );
355 }
356 else if ( !is_ident12 && !is_ident23 )
357 {
358 // Apply to all three columns.
359
361 &gamma12,
362 &sigma12,
363 &gamma23,
364 &sigma23,
365 a1, 1,
366 a2, 1,
367 a3, 1 );
368 }
369 }
370
371 if ( n_left == 1 )
372 {
373 g23 = buff_G + (g )*rs_G + (k )*cs_G;
374 a2 = buff_A + (g )*cs_A;
375 a3 = buff_A + (g + 1)*cs_A;
376
377 gamma23 = g23->real;
378 sigma23 = g23->imag;
379
380 is_ident23 = ( gamma23 == one && sigma23 == zero );
381
382 if ( !is_ident23 )
384 &gamma23,
385 &sigma23,
386 a2, 1,
387 a3, 1 );
388 }
389 }
390
391 return FLA_SUCCESS;
392}

References bl1_s0(), bl1_s1(), FLA_Apply_G_rf_ass_var1(), i, and n_left.

Referenced by FLA_Apply_G_rf_asm_var6(), and FLA_Apply_G_rf_bls_var6().

◆ FLA_Apply_G_rf_ass_var6b()

FLA_Error FLA_Apply_G_rf_ass_var6b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
int  iTL,
scomplex buff_G,
int  rs_G,
int  cs_G,
float buff_A,
int  rs_A,
int  cs_A 
)
125{
127
128 return FLA_SUCCESS;
129}

References i.

Referenced by FLA_Apply_G_rf_asm_var6b(), and FLA_Apply_G_rf_bls_var6b().

◆ FLA_Apply_G_rf_ass_var7()

FLA_Error FLA_Apply_G_rf_ass_var7 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
float buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_ass_var8()

FLA_Error FLA_Apply_G_rf_ass_var8 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
float buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_ass_var8b()

FLA_Error FLA_Apply_G_rf_ass_var8b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
int  iTL,
scomplex buff_G,
int  rs_G,
int  cs_G,
float buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_ass_var9()

FLA_Error FLA_Apply_G_rf_ass_var9 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
float buff_A,
int  rs_A,
int  cs_A 
)
118{
119 float one = bl1_s1();
120 float zero = bl1_s0();
121 float gamma12;
122 float sigma12;
123 float gamma23;
124 float sigma23;
125 float* a1;
126 float* a2;
127 float* a3;
128 scomplex* g12;
129 scomplex* g23;
130 int i, j, g, k;
131 int nG, nG_app;
132 int n_iter;
133 int n_left;
134 int k_minus_1;
135 int n_fuse;
137
138 k_minus_1 = k_G - 1;
139 nG = n_A - 1;
140 n_fuse = 2;
141
142 // Use the simple variant for nG < (k - 1) or k == 1.
143 if ( nG < 2*k_minus_1 || k_G == 1 )
144 {
146 m_A,
147 n_A,
148 buff_G, rs_G, cs_G,
149 buff_A, rs_A, cs_A );
150 return FLA_SUCCESS;
151 }
152
153
154 // Start-up phase.
155
156 for ( j = -1; j < k_minus_1; j += n_fuse )
157 {
158 nG_app = j + 1;
159 n_iter = nG_app;
160 n_left = 1;
161
162 for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g )
163 {
164 g12 = buff_G + (g )*rs_G + (k )*cs_G;
165 g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
166 a1 = buff_A + (g )*cs_A;
167 a2 = buff_A + (g + 1)*cs_A;
168 a3 = buff_A + (g + 2)*cs_A;
169
170 gamma12 = g12->real;
171 sigma12 = g12->imag;
172 gamma23 = g23->real;
173 sigma23 = g23->imag;
174
175 is_ident12 = ( gamma12 == one && sigma12 == zero );
176 is_ident23 = ( gamma23 == one && sigma23 == zero );
177
178 if ( !is_ident12 && is_ident23 )
179 {
180 // Apply only to columns 1 and 2.
181
183 &gamma12,
184 &sigma12,
185 a1, 1,
186 a2, 1 );
187 }
188 else if ( is_ident12 && !is_ident23 )
189 {
190 // Apply only to columns 2 and 3.
191
193 &gamma23,
194 &sigma23,
195 a2, 1,
196 a3, 1 );
197 }
198 else if ( !is_ident12 && !is_ident23 )
199 {
200 // Apply to all three columns.
201
203 &gamma12,
204 &sigma12,
205 &gamma23,
206 &sigma23,
207 a1, 1,
208 a2, 1,
209 a3, 1 );
210 }
211 }
212
213 if ( n_left == 1 )
214 {
215 g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
216 a2 = buff_A + (g + 1)*cs_A;
217 a3 = buff_A + (g + 2)*cs_A;
218
219 gamma23 = g23->real;
220 sigma23 = g23->imag;
221
222 is_ident23 = ( gamma23 == one && sigma23 == zero );
223
224 if ( !is_ident23 )
226 &gamma23,
227 &sigma23,
228 a2, 1,
229 a3, 1 );
230 }
231 }
232
233 // Pipeline stage
234
235 for ( ; j < nG - 1; j += n_fuse )
236 {
237 nG_app = k_G;
238 n_iter = nG_app;
239 n_left = 0;
240
241 for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g )
242 {
243 g12 = buff_G + (g )*rs_G + (k )*cs_G;
244 g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
245 a1 = buff_A + (g )*cs_A;
246 a2 = buff_A + (g + 1)*cs_A;
247 a3 = buff_A + (g + 2)*cs_A;
248
249 gamma12 = g12->real;
250 sigma12 = g12->imag;
251 gamma23 = g23->real;
252 sigma23 = g23->imag;
253
254 is_ident12 = ( gamma12 == one && sigma12 == zero );
255 is_ident23 = ( gamma23 == one && sigma23 == zero );
256
257 if ( !is_ident12 && is_ident23 )
258 {
259 // Apply only to columns 1 and 2.
260
262 &gamma12,
263 &sigma12,
264 a1, 1,
265 a2, 1 );
266 }
267 else if ( is_ident12 && !is_ident23 )
268 {
269 // Apply only to columns 2 and 3.
270
272 &gamma23,
273 &sigma23,
274 a2, 1,
275 a3, 1 );
276 }
277 else if ( !is_ident12 && !is_ident23 )
278 {
279 // Apply to all three columns.
280
282 &gamma12,
283 &sigma12,
284 &gamma23,
285 &sigma23,
286 a1, 1,
287 a2, 1,
288 a3, 1 );
289 }
290 }
291 }
292
293 // Shutdown stage
294
295 for ( j = nG % n_fuse; j < k_G; j += n_fuse )
296 {
297 g = nG - 1;
298 k = j;
299
300 n_left = 1;
301 if ( n_left == 1 )
302 {
303 g12 = buff_G + (g )*rs_G + (k )*cs_G;
304 a1 = buff_A + (g )*cs_A;
305 a2 = buff_A + (g + 1)*cs_A;
306
307 gamma12 = g12->real;
308 sigma12 = g12->imag;
309
310 is_ident12 = ( gamma12 == one && sigma12 == zero );
311
312 if ( !is_ident12 )
314 &gamma12,
315 &sigma12,
316 a1, 1,
317 a2, 1 );
318 ++k;
319 --g;
320 }
321
322 nG_app = k_minus_1 - j;
323 n_iter = nG_app;
324
325 for ( i = 0; i < n_iter; ++i, ++k, --g )
326 {
327 g12 = buff_G + (g )*rs_G + (k )*cs_G;
328 g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
329 a1 = buff_A + (g )*cs_A;
330 a2 = buff_A + (g + 1)*cs_A;
331 a3 = buff_A + (g + 2)*cs_A;
332
333 gamma12 = g12->real;
334 sigma12 = g12->imag;
335 gamma23 = g23->real;
336 sigma23 = g23->imag;
337
338 is_ident12 = ( gamma12 == one && sigma12 == zero );
339 is_ident23 = ( gamma23 == one && sigma23 == zero );
340
341 if ( !is_ident12 && is_ident23 )
342 {
343 // Apply only to columns 1 and 2.
344
346 &gamma12,
347 &sigma12,
348 a1, 1,
349 a2, 1 );
350 }
351 else if ( is_ident12 && !is_ident23 )
352 {
353 // Apply only to columns 2 and 3.
354
356 &gamma23,
357 &sigma23,
358 a2, 1,
359 a3, 1 );
360 }
361 else if ( !is_ident12 && !is_ident23 )
362 {
363 // Apply to all three columns.
364
366 &gamma12,
367 &sigma12,
368 &gamma23,
369 &sigma23,
370 a1, 1,
371 a2, 1,
372 a3, 1 );
373 }
374 }
375 }
376
377 return FLA_SUCCESS;
378}

References bl1_s0(), bl1_s1(), FLA_Apply_G_rf_ass_var1(), i, and n_left.

Referenced by FLA_Apply_G_rf_asm_var9(), and FLA_Apply_G_rf_bls_var9().

◆ FLA_Apply_G_rf_ass_var9b()

FLA_Error FLA_Apply_G_rf_ass_var9b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
int  iTL,
scomplex buff_G,
int  rs_G,
int  cs_G,
float buff_A,
int  rs_A,
int  cs_A 
)
125{
127
128 return FLA_SUCCESS;
129}

References i.

Referenced by FLA_Apply_G_rf_asm_var9b(), and FLA_Apply_G_rf_bls_var9b().

◆ FLA_Apply_G_rf_asz_var1()

FLA_Error FLA_Apply_G_rf_asz_var1 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)
272{
273 double one = bl1_d1();
274 double zero = bl1_d0();
275 int nG_app = n_A - 1;
276 int l, j;
277 double gamma;
278 double sigma;
279 dcomplex* a1;
280 dcomplex* a2;
281 dcomplex* g1;
282 dcomplex* g11;
283
284 g1 = buff_G;
285
286 for ( l = 0; l < k_G; ++l )
287 {
288 a1 = buff_A;
289 a2 = buff_A + cs_A;
290 g11 = g1;
291
292 for ( j = 0; j < nG_app; ++j )
293 {
294 gamma = g11->real;
295 sigma = g11->imag;
296
297 // Skip the current iteration if the rotation is identity.
298 if ( gamma != one || sigma != zero )
299 {
301 &gamma,
302 &sigma,
303 a1, 1,
304 a2, 1 );
305 }
306
307 a1 += cs_A;
308 a2 += cs_A;
309 g11 += rs_G;
310 }
311
312 g1 += cs_G;
313 }
314
315 return FLA_SUCCESS;
316}

References bl1_d0(), bl1_d1(), i, and dcomplex::real.

Referenced by FLA_Apply_G_rf_asm_var1(), FLA_Apply_G_rf_asz_var2(), FLA_Apply_G_rf_asz_var3(), FLA_Apply_G_rf_asz_var6(), FLA_Apply_G_rf_asz_var9(), and FLA_Apply_G_rf_blz_var1().

◆ FLA_Apply_G_rf_asz_var2()

FLA_Error FLA_Apply_G_rf_asz_var2 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)
457{
458 double one = bl1_d1();
459 double zero = bl1_d0();
460 double gamma;
461 double sigma;
462 dcomplex* a1;
463 dcomplex* a2;
464 dcomplex* g11;
465 int j, g, k;
466 int nG, nG_app;
467 int k_minus_1;
468
469 k_minus_1 = k_G - 1;
470 nG = n_A - 1;
471
472 // Use the simple variant for nG < 2(k - 1).
473 if ( nG < k_minus_1 || k_G == 1 )
474 {
476 m_A,
477 n_A,
478 buff_G, rs_G, cs_G,
479 buff_A, rs_A, cs_A );
480 return FLA_SUCCESS;
481 }
482
483
484 // Start-up phase.
485
486 for ( j = 0; j < k_minus_1; ++j )
487 {
488 nG_app = j + 1;
489
490 for ( k = 0, g = nG_app - 1; k < nG_app; ++k, --g )
491 {
492 g11 = buff_G + (g )*rs_G + (k )*cs_G;
493 a1 = buff_A + (g )*cs_A;
494 a2 = buff_A + (g + 1)*cs_A;
495
496 gamma = g11->real;
497 sigma = g11->imag;
498
499 // Skip the current iteration if the rotation is identity.
500 if ( gamma == one && sigma == zero ) continue;
501
503 &gamma,
504 &sigma,
505 a1, 1,
506 a2, 1 );
507 }
508 }
509
510 // Pipeline stage
511
512 for ( j = k_minus_1; j < nG; ++j )
513 {
514 nG_app = k_G;
515
516 for ( k = 0, g = j; k < nG_app; ++k, --g )
517 {
518 g11 = buff_G + (g )*rs_G + (k )*cs_G;
519 a1 = buff_A + (g )*cs_A;
520 a2 = buff_A + (g + 1)*cs_A;
521
522 gamma = g11->real;
523 sigma = g11->imag;
524
525 // Skip the current iteration if the rotation is identity.
526 if ( gamma == one && sigma == zero ) continue;
527
529 &gamma,
530 &sigma,
531 a1, 1,
532 a2, 1 );
533 }
534 }
535
536 // Shutdown stage
537
538 for ( j = nG - k_minus_1; j < nG; ++j )
539 {
540 nG_app = nG - j;
541
542 for ( k = k_G - nG_app, g = nG - 1; k < k_G; ++k, --g )
543 {
544 g11 = buff_G + (g )*rs_G + (k )*cs_G;
545 a1 = buff_A + (g )*cs_A;
546 a2 = buff_A + (g + 1)*cs_A;
547
548 gamma = g11->real;
549 sigma = g11->imag;
550
551 // Skip the current iteration if the rotation is identity.
552 if ( gamma == one && sigma == zero ) continue;
553
555 &gamma,
556 &sigma,
557 a1, 1,
558 a2, 1 );
559 }
560 }
561
562 return FLA_SUCCESS;
563}
FLA_Error FLA_Apply_G_rf_asz_var1(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var1.c:267

References bl1_d0(), bl1_d1(), FLA_Apply_G_rf_asz_var1(), and i.

Referenced by FLA_Apply_G_rf_asm_var2(), and FLA_Apply_G_rf_blz_var2().

◆ FLA_Apply_G_rf_asz_var3()

FLA_Error FLA_Apply_G_rf_asz_var3 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)
1478{
1479 double one = bl1_d1();
1480 double zero = bl1_d0();
1481 double gamma23_k1;
1482 double sigma23_k1;
1483 double gamma34_k1;
1484 double sigma34_k1;
1485 double gamma12_k2;
1486 double sigma12_k2;
1487 double gamma23_k2;
1488 double sigma23_k2;
1489 dcomplex* a1;
1490 dcomplex* a2;
1491 dcomplex* a3;
1492 dcomplex* a4;
1497 int i, j, g, k;
1498 int nG, nG_app;
1499 int n_iter;
1500 int n_left;
1501 int k_minus_1;
1502 int n_fuse;
1503 int k_fuse;
1506 int has_ident;
1507
1508 k_minus_1 = k_G - 1;
1509 nG = n_A - 1;
1510 n_fuse = 2;
1511 k_fuse = 2;
1512
1513 // Use the simple variant for nG < (k - 1) or k == 1.
1514 if ( nG < 2*k_minus_1 || k_G == 1 )
1515 {
1517 m_A,
1518 n_A,
1519 buff_G, rs_G, cs_G,
1520 buff_A, rs_A, cs_A );
1521 return FLA_SUCCESS;
1522 }
1523
1524
1525 // Start-up phase.
1526
1527 for ( j = -1; j < k_minus_1; j += n_fuse )
1528 {
1529 nG_app = j + 2;
1530 n_iter = nG_app / k_fuse;
1531 //n_iter = nG_app % k_fuse;
1532 n_left = 1;
1533
1534 for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
1535 {
1536 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1537 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1538 g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
1539 g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
1540 a1 = buff_A + (g - 1)*cs_A;
1541 a2 = buff_A + (g )*cs_A;
1542 a3 = buff_A + (g + 1)*cs_A;
1543 a4 = buff_A + (g + 2)*cs_A;
1544
1545 gamma23_k1 = g23_k1->real;
1546 sigma23_k1 = g23_k1->imag;
1547 gamma34_k1 = g34_k1->real;
1548 sigma34_k1 = g34_k1->imag;
1549 gamma12_k2 = g12_k2->real;
1550 sigma12_k2 = g12_k2->imag;
1551 gamma23_k2 = g23_k2->real;
1552 sigma23_k2 = g23_k2->imag;
1553
1560
1561 if ( has_ident )
1562 {
1563 // Apply to pairs of columns as needed.
1564
1565 if ( !is_ident23_k1 )
1567 &gamma23_k1,
1568 &sigma23_k1,
1569 a2, 1,
1570 a3, 1 );
1571
1572 if ( !is_ident34_k1 )
1574 &gamma34_k1,
1575 &sigma34_k1,
1576 a3, 1,
1577 a4, 1 );
1578
1579 if ( !is_ident12_k2 )
1581 &gamma12_k2,
1582 &sigma12_k2,
1583 a1, 1,
1584 a2, 1 );
1585
1586 if ( !is_ident23_k2 )
1588 &gamma23_k2,
1589 &sigma23_k2,
1590 a2, 1,
1591 a3, 1 );
1592 }
1593 else
1594 {
1595 // Apply to all four columns.
1596
1598 &gamma23_k1,
1599 &sigma23_k1,
1600 &gamma34_k1,
1601 &sigma34_k1,
1602 &gamma12_k2,
1603 &sigma12_k2,
1604 &gamma23_k2,
1605 &sigma23_k2,
1606 a1, 1,
1607 a2, 1,
1608 a3, 1,
1609 a4, 1 );
1610 }
1611 }
1612
1613 if ( n_left == 1 )
1614 {
1615 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1616 a3 = buff_A + (g + 1)*cs_A;
1617 a4 = buff_A + (g + 2)*cs_A;
1618
1619 gamma34_k1 = g34_k1->real;
1620 sigma34_k1 = g34_k1->imag;
1621
1623
1624 if ( !is_ident34_k1 )
1626 &gamma34_k1,
1627 &sigma34_k1,
1628 a3, 1,
1629 a4, 1 );
1630 }
1631 }
1632
1633 // Pipeline stage
1634
1635 for ( ; j < nG - 1; j += n_fuse )
1636 {
1637 nG_app = k_G;
1638 n_iter = nG_app / k_fuse;
1639 n_left = nG_app % k_fuse;
1640
1641 for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
1642 {
1643 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1644 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1645 g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
1646 g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
1647 a1 = buff_A + (g - 1)*cs_A;
1648 a2 = buff_A + (g )*cs_A;
1649 a3 = buff_A + (g + 1)*cs_A;
1650 a4 = buff_A + (g + 2)*cs_A;
1651
1652 gamma23_k1 = g23_k1->real;
1653 sigma23_k1 = g23_k1->imag;
1654 gamma34_k1 = g34_k1->real;
1655 sigma34_k1 = g34_k1->imag;
1656 gamma12_k2 = g12_k2->real;
1657 sigma12_k2 = g12_k2->imag;
1658 gamma23_k2 = g23_k2->real;
1659 sigma23_k2 = g23_k2->imag;
1660
1667
1668 if ( has_ident )
1669 {
1670 // Apply to pairs of columns as needed.
1671
1672 if ( !is_ident23_k1 )
1674 &gamma23_k1,
1675 &sigma23_k1,
1676 a2, 1,
1677 a3, 1 );
1678
1679 if ( !is_ident34_k1 )
1681 &gamma34_k1,
1682 &sigma34_k1,
1683 a3, 1,
1684 a4, 1 );
1685
1686 if ( !is_ident12_k2 )
1688 &gamma12_k2,
1689 &sigma12_k2,
1690 a1, 1,
1691 a2, 1 );
1692
1693 if ( !is_ident23_k2 )
1695 &gamma23_k2,
1696 &sigma23_k2,
1697 a2, 1,
1698 a3, 1 );
1699 }
1700 else
1701 {
1702 // Apply to all four columns.
1703
1705 &gamma23_k1,
1706 &sigma23_k1,
1707 &gamma34_k1,
1708 &sigma34_k1,
1709 &gamma12_k2,
1710 &sigma12_k2,
1711 &gamma23_k2,
1712 &sigma23_k2,
1713 a1, 1,
1714 a2, 1,
1715 a3, 1,
1716 a4, 1 );
1717 }
1718 }
1719
1720 if ( n_left == 1 )
1721 {
1722 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1723 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1724 a2 = buff_A + (g )*cs_A;
1725 a3 = buff_A + (g + 1)*cs_A;
1726 a4 = buff_A + (g + 2)*cs_A;
1727
1728 gamma23_k1 = g23_k1->real;
1729 sigma23_k1 = g23_k1->imag;
1730 gamma34_k1 = g34_k1->real;
1731 sigma34_k1 = g34_k1->imag;
1732
1735
1736 if ( !is_ident23_k1 && is_ident34_k1 )
1737 {
1739 &gamma23_k1,
1740 &sigma23_k1,
1741 a2, 1,
1742 a3, 1 );
1743 }
1744 else if ( is_ident23_k1 && !is_ident34_k1 )
1745 {
1747 &gamma34_k1,
1748 &sigma34_k1,
1749 a3, 1,
1750 a4, 1 );
1751 }
1752 else
1753 {
1755 &gamma23_k1,
1756 &sigma23_k1,
1757 &gamma34_k1,
1758 &sigma34_k1,
1759 a2, 1,
1760 a3, 1,
1761 a4, 1 );
1762 }
1763 }
1764 }
1765
1766 // Shutdown stage
1767
1768 for ( j = nG % n_fuse; j < k_G; j += n_fuse )
1769 {
1770 g = nG - 1;
1771 k = j;
1772
1773 //n_left = 1;
1774 //if ( n_left == 1 )
1775 {
1776 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1777 a2 = buff_A + (g )*cs_A;
1778 a3 = buff_A + (g + 1)*cs_A;
1779
1780 gamma23_k1 = g23_k1->real;
1781 sigma23_k1 = g23_k1->imag;
1782
1784
1785 if ( !is_ident23_k1 )
1787 &gamma23_k1,
1788 &sigma23_k1,
1789 a2, 1,
1790 a3, 1 );
1791 ++k;
1792 --g;
1793 }
1794
1795 nG_app = k_minus_1 - j;
1796 n_iter = nG_app / k_fuse;
1797 n_left = nG_app % k_fuse;
1798
1799 for ( i = 0; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
1800 {
1801 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1802 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1803 g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
1804 g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
1805 a1 = buff_A + (g - 1)*cs_A;
1806 a2 = buff_A + (g )*cs_A;
1807 a3 = buff_A + (g + 1)*cs_A;
1808 a4 = buff_A + (g + 2)*cs_A;
1809
1810 gamma23_k1 = g23_k1->real;
1811 sigma23_k1 = g23_k1->imag;
1812 gamma34_k1 = g34_k1->real;
1813 sigma34_k1 = g34_k1->imag;
1814 gamma12_k2 = g12_k2->real;
1815 sigma12_k2 = g12_k2->imag;
1816 gamma23_k2 = g23_k2->real;
1817 sigma23_k2 = g23_k2->imag;
1818
1825
1826 if ( has_ident )
1827 {
1828 // Apply to pairs of columns as needed.
1829
1830 if ( !is_ident23_k1 )
1832 &gamma23_k1,
1833 &sigma23_k1,
1834 a2, 1,
1835 a3, 1 );
1836
1837 if ( !is_ident34_k1 )
1839 &gamma34_k1,
1840 &sigma34_k1,
1841 a3, 1,
1842 a4, 1 );
1843
1844 if ( !is_ident12_k2 )
1846 &gamma12_k2,
1847 &sigma12_k2,
1848 a1, 1,
1849 a2, 1 );
1850
1851 if ( !is_ident23_k2 )
1853 &gamma23_k2,
1854 &sigma23_k2,
1855 a2, 1,
1856 a3, 1 );
1857 }
1858 else
1859 {
1860 // Apply to all four columns.
1861
1863 &gamma23_k1,
1864 &sigma23_k1,
1865 &gamma34_k1,
1866 &sigma34_k1,
1867 &gamma12_k2,
1868 &sigma12_k2,
1869 &gamma23_k2,
1870 &sigma23_k2,
1871 a1, 1,
1872 a2, 1,
1873 a3, 1,
1874 a4, 1 );
1875 }
1876 }
1877
1878 if ( n_left == 1 )
1879 {
1880 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1881 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1882 a2 = buff_A + (g )*cs_A;
1883 a3 = buff_A + (g + 1)*cs_A;
1884 a4 = buff_A + (g + 2)*cs_A;
1885
1886 gamma23_k1 = g23_k1->real;
1887 sigma23_k1 = g23_k1->imag;
1888 gamma34_k1 = g34_k1->real;
1889 sigma34_k1 = g34_k1->imag;
1890
1893
1894 if ( !is_ident23_k1 && is_ident34_k1 )
1895 {
1897 &gamma23_k1,
1898 &sigma23_k1,
1899 a2, 1,
1900 a3, 1 );
1901 }
1902 else if ( is_ident23_k1 && !is_ident34_k1 )
1903 {
1905 &gamma34_k1,
1906 &sigma34_k1,
1907 a3, 1,
1908 a4, 1 );
1909 }
1910 else
1911 {
1913 &gamma23_k1,
1914 &sigma23_k1,
1915 &gamma34_k1,
1916 &sigma34_k1,
1917 a2, 1,
1918 a3, 1,
1919 a4, 1 );
1920 }
1921 }
1922 }
1923
1924 return FLA_SUCCESS;
1925}

References bl1_d0(), bl1_d1(), FLA_Apply_G_rf_asz_var1(), i, and n_left.

Referenced by FLA_Apply_G_rf_asm_var3().

◆ FLA_Apply_G_rf_asz_var3b()

FLA_Error FLA_Apply_G_rf_asz_var3b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
int  iTL,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)
627{
629
630 return FLA_SUCCESS;
631}

References i.

Referenced by FLA_Apply_G_rf_asm_var3b().

◆ FLA_Apply_G_rf_asz_var4()

FLA_Error FLA_Apply_G_rf_asz_var4 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_asz_var5()

FLA_Error FLA_Apply_G_rf_asz_var5 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_asz_var5b()

FLA_Error FLA_Apply_G_rf_asz_var5b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
int  iTL,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_asz_var6()

FLA_Error FLA_Apply_G_rf_asz_var6 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)
961{
962 double one = bl1_d1();
963 double zero = bl1_d0();
964 double gamma12;
965 double sigma12;
966 double gamma23;
967 double sigma23;
968 dcomplex* a1;
969 dcomplex* a2;
970 dcomplex* a3;
971 dcomplex* g12;
972 dcomplex* g23;
973 int i, j, g, k;
974 int nG, nG_app;
975 int n_iter;
976 int n_left;
977 int k_minus_1;
978 int n_fuse;
980
981 k_minus_1 = k_G - 1;
982 nG = n_A - 1;
983 n_fuse = 2;
984
985 // Use the simple variant for nG < (k - 1) or k == 1.
986 if ( nG < k_minus_1 || k_G == 1 )
987 {
989 m_A,
990 n_A,
991 buff_G, rs_G, cs_G,
992 buff_A, rs_A, cs_A );
993 return FLA_SUCCESS;
994 }
995
996
997 // Start-up phase.
998
999 for ( j = 0; j < k_minus_1; ++j )
1000 {
1001 nG_app = j + 1;
1002 n_iter = nG_app / n_fuse;
1003 n_left = nG_app % n_fuse;
1004
1005 //for ( k = 0, g = nG_app - 1; k < nG_app; k += n_fuse, g -= n_fuse )
1006 for ( i = 0, k = 0, g = nG_app - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
1007 {
1008 g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
1009 g23 = buff_G + (g )*rs_G + (k )*cs_G;
1010 a1 = buff_A + (g - 1)*cs_A;
1011 a2 = buff_A + (g )*cs_A;
1012 a3 = buff_A + (g + 1)*cs_A;
1013
1014 gamma12 = g12->real;
1015 sigma12 = g12->imag;
1016 gamma23 = g23->real;
1017 sigma23 = g23->imag;
1018
1019 is_ident12 = ( gamma12 == one && sigma12 == zero );
1020 is_ident23 = ( gamma23 == one && sigma23 == zero );
1021
1022 if ( !is_ident12 && is_ident23 )
1023 {
1024 // Apply only to columns 1 and 2.
1025
1027 &gamma12,
1028 &sigma12,
1029 a1, 1,
1030 a2, 1 );
1031 }
1032 else if ( is_ident12 && !is_ident23 )
1033 {
1034 // Apply only to columns 2 and 3.
1035
1037 &gamma23,
1038 &sigma23,
1039 a2, 1,
1040 a3, 1 );
1041 }
1042 else if ( !is_ident12 && !is_ident23 )
1043 {
1044 // Apply to all three columns.
1045
1047 &gamma12,
1048 &sigma12,
1049 &gamma23,
1050 &sigma23,
1051 a1, 1,
1052 a2, 1,
1053 a3, 1 );
1054 }
1055 }
1056 //for ( k = 0; k < n_left; k += 1, g -= 1 )
1057 if ( n_left == 1 )
1058 {
1059 g23 = buff_G + (g )*rs_G + (k )*cs_G;
1060 a2 = buff_A + (g )*cs_A;
1061 a3 = buff_A + (g + 1)*cs_A;
1062
1063 gamma23 = g23->real;
1064 sigma23 = g23->imag;
1065
1066 is_ident23 = ( gamma23 == one && sigma23 == zero );
1067
1068 if ( !is_ident23 )
1070 &gamma23,
1071 &sigma23,
1072 a2, 1,
1073 a3, 1 );
1074 }
1075 }
1076
1077 // Pipeline stage
1078
1079 for ( j = k_minus_1; j < nG; ++j )
1080 {
1081 nG_app = k_G;
1082 n_iter = nG_app / n_fuse;
1083 n_left = nG_app % n_fuse;
1084
1085 for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
1086 {
1087 g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
1088 g23 = buff_G + (g )*rs_G + (k )*cs_G;
1089 a1 = buff_A + (g - 1)*cs_A;
1090 a2 = buff_A + (g )*cs_A;
1091 a3 = buff_A + (g + 1)*cs_A;
1092
1093 gamma12 = g12->real;
1094 sigma12 = g12->imag;
1095 gamma23 = g23->real;
1096 sigma23 = g23->imag;
1097
1098 is_ident12 = ( gamma12 == one && sigma12 == zero );
1099 is_ident23 = ( gamma23 == one && sigma23 == zero );
1100
1101 if ( !is_ident12 && is_ident23 )
1102 {
1103 // Apply only to columns 1 and 2.
1104
1106 &gamma12,
1107 &sigma12,
1108 a1, 1,
1109 a2, 1 );
1110 }
1111 else if ( is_ident12 && !is_ident23 )
1112 {
1113 // Apply only to columns 2 and 3.
1114
1116 &gamma23,
1117 &sigma23,
1118 a2, 1,
1119 a3, 1 );
1120 }
1121 else if ( !is_ident12 && !is_ident23 )
1122 {
1123 // Apply to all three columns.
1124
1126 &gamma12,
1127 &sigma12,
1128 &gamma23,
1129 &sigma23,
1130 a1, 1,
1131 a2, 1,
1132 a3, 1 );
1133 }
1134 }
1135 //for ( k = 0; k < n_left; k += 1, g -= 1 )
1136 if ( n_left == 1 )
1137 {
1138 g23 = buff_G + (g )*rs_G + (k )*cs_G;
1139 a2 = buff_A + (g )*cs_A;
1140 a3 = buff_A + (g + 1)*cs_A;
1141
1142 gamma23 = g23->real;
1143 sigma23 = g23->imag;
1144
1145 is_ident23 = ( gamma23 == one && sigma23 == zero );
1146
1147 if ( !is_ident23 )
1149 &gamma23,
1150 &sigma23,
1151 a2, 1,
1152 a3, 1 );
1153 }
1154 }
1155
1156 // Shutdown stage
1157
1158 for ( j = 1; j < k_G; ++j )
1159 {
1160 nG_app = k_G - j;
1161 n_iter = nG_app / n_fuse;
1162 n_left = nG_app % n_fuse;
1163
1164 for ( i = 0, k = j, g = nG - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
1165 {
1166 g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
1167 g23 = buff_G + (g )*rs_G + (k )*cs_G;
1168 a1 = buff_A + (g - 1)*cs_A;
1169 a2 = buff_A + (g )*cs_A;
1170 a3 = buff_A + (g + 1)*cs_A;
1171
1172 gamma12 = g12->real;
1173 sigma12 = g12->imag;
1174 gamma23 = g23->real;
1175 sigma23 = g23->imag;
1176
1177 is_ident12 = ( gamma12 == one && sigma12 == zero );
1178 is_ident23 = ( gamma23 == one && sigma23 == zero );
1179
1180 if ( !is_ident12 && is_ident23 )
1181 {
1182 // Apply only to columns 1 and 2.
1183
1185 &gamma12,
1186 &sigma12,
1187 a1, 1,
1188 a2, 1 );
1189 }
1190 else if ( is_ident12 && !is_ident23 )
1191 {
1192 // Apply only to columns 2 and 3.
1193
1195 &gamma23,
1196 &sigma23,
1197 a2, 1,
1198 a3, 1 );
1199 }
1200 else if ( !is_ident12 && !is_ident23 )
1201 {
1202 // Apply to all three columns.
1203
1205 &gamma12,
1206 &sigma12,
1207 &gamma23,
1208 &sigma23,
1209 a1, 1,
1210 a2, 1,
1211 a3, 1 );
1212 }
1213 }
1214 //for ( k = 0; k < nG_app_left; k += 1, g -= 1 )
1215 if ( n_left == 1 )
1216 {
1217 g23 = buff_G + (g )*rs_G + (k )*cs_G;
1218 a2 = buff_A + (g )*cs_A;
1219 a3 = buff_A + (g + 1)*cs_A;
1220
1221 gamma23 = g23->real;
1222 sigma23 = g23->imag;
1223
1224 is_ident23 = ( gamma23 == one && sigma23 == zero );
1225
1226 if ( !is_ident23 )
1228 &gamma23,
1229 &sigma23,
1230 a2, 1,
1231 a3, 1 );
1232 }
1233 }
1234
1235 return FLA_SUCCESS;
1236}

References bl1_d0(), bl1_d1(), FLA_Apply_G_rf_asz_var1(), i, and n_left.

Referenced by FLA_Apply_G_rf_asm_var6(), and FLA_Apply_G_rf_blz_var6().

◆ FLA_Apply_G_rf_asz_var6b()

FLA_Error FLA_Apply_G_rf_asz_var6b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
int  iTL,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)
450{
452
453 return FLA_SUCCESS;
454}

References i.

Referenced by FLA_Apply_G_rf_asm_var6b().

◆ FLA_Apply_G_rf_asz_var7()

FLA_Error FLA_Apply_G_rf_asz_var7 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_asz_var8()

FLA_Error FLA_Apply_G_rf_asz_var8 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_asz_var8b()

FLA_Error FLA_Apply_G_rf_asz_var8b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
int  iTL,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_asz_var9()

FLA_Error FLA_Apply_G_rf_asz_var9 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)
919{
920 double one = bl1_d1();
921 double zero = bl1_d0();
922 double gamma12;
923 double sigma12;
924 double gamma23;
925 double sigma23;
926 dcomplex* a1;
927 dcomplex* a2;
928 dcomplex* a3;
929 dcomplex* g12;
930 dcomplex* g23;
931 int i, j, g, k;
932 int nG, nG_app;
933 int n_iter;
934 int n_left;
935 int k_minus_1;
936 int n_fuse;
938
939 k_minus_1 = k_G - 1;
940 nG = n_A - 1;
941 n_fuse = 2;
942
943 // Use the simple variant for nG < (k - 1) or k == 1.
944 if ( nG < 2*k_minus_1 || k_G == 1 )
945 {
947 m_A,
948 n_A,
949 buff_G, rs_G, cs_G,
950 buff_A, rs_A, cs_A );
951 return FLA_SUCCESS;
952 }
953
954
955 // Start-up phase.
956
957 for ( j = -1; j < k_minus_1; j += n_fuse )
958 {
959 nG_app = j + 1;
960 n_iter = nG_app;
961 n_left = 1;
962
963 for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g )
964 {
965 g12 = buff_G + (g )*rs_G + (k )*cs_G;
966 g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
967 a1 = buff_A + (g )*cs_A;
968 a2 = buff_A + (g + 1)*cs_A;
969 a3 = buff_A + (g + 2)*cs_A;
970
971 gamma12 = g12->real;
972 sigma12 = g12->imag;
973 gamma23 = g23->real;
974 sigma23 = g23->imag;
975
976 is_ident12 = ( gamma12 == one && sigma12 == zero );
977 is_ident23 = ( gamma23 == one && sigma23 == zero );
978
979 if ( !is_ident12 && is_ident23 )
980 {
981 // Apply only to columns 1 and 2.
982
984 &gamma12,
985 &sigma12,
986 a1, 1,
987 a2, 1 );
988 }
989 else if ( is_ident12 && !is_ident23 )
990 {
991 // Apply only to columns 2 and 3.
992
994 &gamma23,
995 &sigma23,
996 a2, 1,
997 a3, 1 );
998 }
999 else if ( !is_ident12 && !is_ident23 )
1000 {
1001 // Apply to all three columns.
1002
1004 &gamma12,
1005 &sigma12,
1006 &gamma23,
1007 &sigma23,
1008 a1, 1,
1009 a2, 1,
1010 a3, 1 );
1011 }
1012 }
1013
1014 if ( n_left == 1 )
1015 {
1016 g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1017 a2 = buff_A + (g + 1)*cs_A;
1018 a3 = buff_A + (g + 2)*cs_A;
1019
1020 gamma23 = g23->real;
1021 sigma23 = g23->imag;
1022
1023 is_ident23 = ( gamma23 == one && sigma23 == zero );
1024
1025 if ( !is_ident23 )
1027 &gamma23,
1028 &sigma23,
1029 a2, 1,
1030 a3, 1 );
1031 }
1032 }
1033
1034 // Pipeline stage
1035
1036 for ( ; j < nG - 1; j += n_fuse )
1037 {
1038 nG_app = k_G;
1039 n_iter = nG_app;
1040 n_left = 0;
1041
1042 for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g )
1043 {
1044 g12 = buff_G + (g )*rs_G + (k )*cs_G;
1045 g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1046 a1 = buff_A + (g )*cs_A;
1047 a2 = buff_A + (g + 1)*cs_A;
1048 a3 = buff_A + (g + 2)*cs_A;
1049
1050 gamma12 = g12->real;
1051 sigma12 = g12->imag;
1052 gamma23 = g23->real;
1053 sigma23 = g23->imag;
1054
1055 is_ident12 = ( gamma12 == one && sigma12 == zero );
1056 is_ident23 = ( gamma23 == one && sigma23 == zero );
1057
1058 if ( !is_ident12 && is_ident23 )
1059 {
1060 // Apply only to columns 1 and 2.
1061
1063 &gamma12,
1064 &sigma12,
1065 a1, 1,
1066 a2, 1 );
1067 }
1068 else if ( is_ident12 && !is_ident23 )
1069 {
1070 // Apply only to columns 2 and 3.
1071
1073 &gamma23,
1074 &sigma23,
1075 a2, 1,
1076 a3, 1 );
1077 }
1078 else if ( !is_ident12 && !is_ident23 )
1079 {
1080 // Apply to all three columns.
1081
1083 &gamma12,
1084 &sigma12,
1085 &gamma23,
1086 &sigma23,
1087 a1, 1,
1088 a2, 1,
1089 a3, 1 );
1090 }
1091 }
1092 }
1093
1094 // Shutdown stage
1095
1096 for ( j = nG % n_fuse; j < k_G; j += n_fuse )
1097 {
1098 g = nG - 1;
1099 k = j;
1100
1101 n_left = 1;
1102 if ( n_left == 1 )
1103 {
1104 g12 = buff_G + (g )*rs_G + (k )*cs_G;
1105 a1 = buff_A + (g )*cs_A;
1106 a2 = buff_A + (g + 1)*cs_A;
1107
1108 gamma12 = g12->real;
1109 sigma12 = g12->imag;
1110
1111 is_ident12 = ( gamma12 == one && sigma12 == zero );
1112
1113 if ( !is_ident12 )
1115 &gamma12,
1116 &sigma12,
1117 a1, 1,
1118 a2, 1 );
1119 ++k;
1120 --g;
1121 }
1122
1123 nG_app = k_minus_1 - j;
1124 n_iter = nG_app;
1125
1126 for ( i = 0; i < n_iter; ++i, ++k, --g )
1127 {
1128 g12 = buff_G + (g )*rs_G + (k )*cs_G;
1129 g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1130 a1 = buff_A + (g )*cs_A;
1131 a2 = buff_A + (g + 1)*cs_A;
1132 a3 = buff_A + (g + 2)*cs_A;
1133
1134 gamma12 = g12->real;
1135 sigma12 = g12->imag;
1136 gamma23 = g23->real;
1137 sigma23 = g23->imag;
1138
1139 is_ident12 = ( gamma12 == one && sigma12 == zero );
1140 is_ident23 = ( gamma23 == one && sigma23 == zero );
1141
1142 if ( !is_ident12 && is_ident23 )
1143 {
1144 // Apply only to columns 1 and 2.
1145
1147 &gamma12,
1148 &sigma12,
1149 a1, 1,
1150 a2, 1 );
1151 }
1152 else if ( is_ident12 && !is_ident23 )
1153 {
1154 // Apply only to columns 2 and 3.
1155
1157 &gamma23,
1158 &sigma23,
1159 a2, 1,
1160 a3, 1 );
1161 }
1162 else if ( !is_ident12 && !is_ident23 )
1163 {
1164 // Apply to all three columns.
1165
1167 &gamma12,
1168 &sigma12,
1169 &gamma23,
1170 &sigma23,
1171 a1, 1,
1172 a2, 1,
1173 a3, 1 );
1174 }
1175 }
1176 }
1177
1178 return FLA_SUCCESS;
1179}

References bl1_d0(), bl1_d1(), FLA_Apply_G_rf_asz_var1(), i, and n_left.

Referenced by FLA_Apply_G_rf_asm_var9(), and FLA_Apply_G_rf_blz_var9().

◆ FLA_Apply_G_rf_asz_var9b()

FLA_Error FLA_Apply_G_rf_asz_var9b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
int  iTL,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)
435{
437
438 return FLA_SUCCESS;
439}

References i.

Referenced by FLA_Apply_G_rf_asm_var9b().

◆ FLA_Apply_G_rf_bhc_var3()

FLA_Error FLA_Apply_G_rf_bhc_var3 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)

◆ FLA_Apply_G_rf_bhd_var3()

FLA_Error FLA_Apply_G_rf_bhd_var3 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)

◆ FLA_Apply_G_rf_bhs_var3()

FLA_Error FLA_Apply_G_rf_bhs_var3 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
float buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)

◆ FLA_Apply_G_rf_bhz_var3()

FLA_Error FLA_Apply_G_rf_bhz_var3 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
FLA_Obj buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)

◆ FLA_Apply_G_rf_blc_var1()

FLA_Error FLA_Apply_G_rf_blc_var1 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)
161{
162 int i;
163 int b = 0;
164
165 for ( i = 0; i < m_A; i += b )
166 {
167 scomplex* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
168 int m_ahead = max( 0, m_A - i );
169
170 b = min( b_alg, m_ahead );
171
172 //FLA_Apply_G_rf_opc_var1( k_G,
174 b,
175 n_A,
176 buff_G, rs_G, cs_G,
177 A1, rs_A, cs_A );
178 }
179
180 return FLA_SUCCESS;
181}

References FLA_Apply_G_rf_asc_var1(), and i.

Referenced by FLA_Apply_G_rf_blk_var1().

◆ FLA_Apply_G_rf_blc_var2()

FLA_Error FLA_Apply_G_rf_blc_var2 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)
161{
162 int i;
163 int b = 0;
164
165 for ( i = 0; i < m_A; i += b )
166 {
167 scomplex* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
168 int m_ahead = max( 0, m_A - i );
169
170 b = min( b_alg, m_ahead );
171
172 //FLA_Apply_G_rf_opc_var2( k_G,
174 b,
175 n_A,
176 buff_G, rs_G, cs_G,
177 A1, rs_A, cs_A );
178 }
179
180 return FLA_SUCCESS;
181}
FLA_Error FLA_Apply_G_rf_asc_var2(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var2.c:339

References FLA_Apply_G_rf_asc_var2(), and i.

Referenced by FLA_Apply_G_rf_blk_var2().

◆ FLA_Apply_G_rf_blc_var3()

FLA_Error FLA_Apply_G_rf_blc_var3 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)
163{
164 int i;
165 int b = 0;
166
167 for ( i = 0; i < m_A; i += b )
168 {
169 scomplex* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
170 int m_ahead = max( 0, m_A - i );
171
172 b = min( b_alg, m_ahead );
173
174 // ass_var3 does not support arbitrary strides: TODO
176 //FLA_Apply_G_rf_asc_var3( k_G,
177 b,
178 n_A,
179 buff_G, rs_G, cs_G,
180 A1, rs_A, cs_A );
181 }
182
183 return FLA_SUCCESS;
184}
FLA_Error FLA_Apply_G_rf_opc_var3(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_opt_var3.c:1018

References FLA_Apply_G_rf_opc_var3(), and i.

Referenced by FLA_Apply_G_lf_blk_var3(), FLA_Apply_G_rf_blk_var3(), FLA_Bsvd_ext_opc_var1(), and FLA_Bsvd_v_opc_var1().

◆ FLA_Apply_G_rf_blc_var3b()

FLA_Error FLA_Apply_G_rf_blc_var3b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)
174{
176
177 return FLA_SUCCESS;
178}

References i.

Referenced by FLA_Apply_G_rf_blk_var3b().

◆ FLA_Apply_G_rf_blc_var4()

FLA_Error FLA_Apply_G_rf_blc_var4 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)

◆ FLA_Apply_G_rf_blc_var5()

FLA_Error FLA_Apply_G_rf_blc_var5 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)

◆ FLA_Apply_G_rf_blc_var5b()

FLA_Error FLA_Apply_G_rf_blc_var5b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)

◆ FLA_Apply_G_rf_blc_var6()

FLA_Error FLA_Apply_G_rf_blc_var6 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)
161{
162 int i;
163 int b = 0;
164
165 for ( i = 0; i < m_A; i += b )
166 {
167 scomplex* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
168 int m_ahead = max( 0, m_A - i );
169
170 b = min( b_alg, m_ahead );
171
172 //FLA_Apply_G_rf_opc_var6( k_G,
174 b,
175 n_A,
176 buff_G, rs_G, cs_G,
177 A1, rs_A, cs_A );
178 }
179
180 return FLA_SUCCESS;
181}
FLA_Error FLA_Apply_G_rf_asc_var6(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var6.c:675

References FLA_Apply_G_rf_asc_var6(), and i.

Referenced by FLA_Apply_G_rf_blk_var6().

◆ FLA_Apply_G_rf_blc_var6b()

FLA_Error FLA_Apply_G_rf_blc_var6b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)
174{
176
177 return FLA_SUCCESS;
178}

References i.

Referenced by FLA_Apply_G_rf_blk_var6b().

◆ FLA_Apply_G_rf_blc_var7()

FLA_Error FLA_Apply_G_rf_blc_var7 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)

◆ FLA_Apply_G_rf_blc_var8()

FLA_Error FLA_Apply_G_rf_blc_var8 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)

◆ FLA_Apply_G_rf_blc_var8b()

FLA_Error FLA_Apply_G_rf_blc_var8b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)

◆ FLA_Apply_G_rf_blc_var9()

FLA_Error FLA_Apply_G_rf_blc_var9 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)
161{
162 int i;
163 int b = 0;
164
165 for ( i = 0; i < m_A; i += b )
166 {
167 scomplex* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
168 int m_ahead = max( 0, m_A - i );
169
170 b = min( b_alg, m_ahead );
171
172 //FLA_Apply_G_rf_opc_var9( k_G,
174 b,
175 n_A,
176 buff_G, rs_G, cs_G,
177 A1, rs_A, cs_A );
178 }
179
180 return FLA_SUCCESS;
181}
FLA_Error FLA_Apply_G_rf_asc_var9(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var9.c:647

References FLA_Apply_G_rf_asc_var9(), and i.

Referenced by FLA_Apply_G_rf_blk_var9().

◆ FLA_Apply_G_rf_blc_var9b()

FLA_Error FLA_Apply_G_rf_blc_var9b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)
174{
176
177 return FLA_SUCCESS;
178}

References i.

Referenced by FLA_Apply_G_rf_blk_var9b().

◆ FLA_Apply_G_rf_bld_var1()

FLA_Error FLA_Apply_G_rf_bld_var1 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)
133{
134 int i;
135 int b = 0;
136
137 for ( i = 0; i < m_A; i += b )
138 {
139 double* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
140 int m_ahead = max( 0, m_A - i );
141
142 b = min( b_alg, m_ahead );
143
144 //FLA_Apply_G_rf_opd_var1( k_G,
146 b,
147 n_A,
148 buff_G, rs_G, cs_G,
149 A1, rs_A, cs_A );
150 }
151
152 return FLA_SUCCESS;
153}

References FLA_Apply_G_rf_asd_var1(), and i.

Referenced by FLA_Apply_G_rf_blk_var1().

◆ FLA_Apply_G_rf_bld_var2()

FLA_Error FLA_Apply_G_rf_bld_var2 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)
133{
134 int i;
135 int b = 0;
136
137 for ( i = 0; i < m_A; i += b )
138 {
139 double* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
140 int m_ahead = max( 0, m_A - i );
141
142 b = min( b_alg, m_ahead );
143
144 //FLA_Apply_G_rf_opd_var2( k_G,
146 b,
147 n_A,
148 buff_G, rs_G, cs_G,
149 A1, rs_A, cs_A );
150 }
151
152 return FLA_SUCCESS;
153}
FLA_Error FLA_Apply_G_rf_asd_var2(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var2.c:226

References FLA_Apply_G_rf_asd_var2(), and i.

Referenced by FLA_Apply_G_rf_blk_var2().

◆ FLA_Apply_G_rf_bld_var3()

FLA_Error FLA_Apply_G_rf_bld_var3 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)
134{
135 int i;
136 int b = 0;
137
138 for ( i = 0; i < m_A; i += b )
139 {
140 double* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
141 int m_ahead = max( 0, m_A - i );
142
143 b = min( b_alg, m_ahead );
144
145 // ass_var3 does not support arbitrary strides: TODO
147 //FLA_Apply_G_rf_asd_var3( k_G,
148 b,
149 n_A,
150 buff_G, rs_G, cs_G,
151 A1, rs_A, cs_A );
152 }
153
154 return FLA_SUCCESS;
155}
FLA_Error FLA_Apply_G_rf_opd_var3(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_opt_var3.c:565

References FLA_Apply_G_rf_opd_var3(), and i.

Referenced by FLA_Apply_G_lf_blk_var3(), FLA_Apply_G_rf_blk_var3(), FLA_Bsvd_ext_opd_var1(), FLA_Bsvd_v_opd_var1(), and FLA_Tevd_v_opd_var1().

◆ FLA_Apply_G_rf_bld_var3b()

FLA_Error FLA_Apply_G_rf_bld_var3b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)
142{
143 int i;
144 int b = 0;
145
146 for ( i = 0; i < m_A; i += b )
147 {
148 double* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
149 int m_behind = i;
150 int m_ahead = max( 0, m_A - i );
151
152 b = min( b_alg, m_ahead );
153
154 //FLA_Apply_G_rf_opd_var3b( k_G,
156 b,
157 n_A,
158 i_k,
159 m_behind,
160 buff_G, rs_G, cs_G,
161 A1, rs_A, cs_A );
162 }
163
164 return FLA_SUCCESS;
165}
FLA_Error FLA_Apply_G_rf_asd_var3b(int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var3b.c:131

References FLA_Apply_G_rf_asd_var3b(), and i.

Referenced by FLA_Apply_G_rf_blk_var3b(), FLA_Bsvd_v_opd_var2(), FLA_Bsvd_v_opz_var2(), FLA_Tevd_v_opd_var2(), and FLA_Tevd_v_opz_var2().

◆ FLA_Apply_G_rf_bld_var4()

FLA_Error FLA_Apply_G_rf_bld_var4 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)

◆ FLA_Apply_G_rf_bld_var5()

FLA_Error FLA_Apply_G_rf_bld_var5 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)

◆ FLA_Apply_G_rf_bld_var5b()

FLA_Error FLA_Apply_G_rf_bld_var5b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)

◆ FLA_Apply_G_rf_bld_var6()

FLA_Error FLA_Apply_G_rf_bld_var6 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)
133{
134 int i;
135 int b = 0;
136
137 for ( i = 0; i < m_A; i += b )
138 {
139 double* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
140 int m_ahead = max( 0, m_A - i );
141
142 b = min( b_alg, m_ahead );
143
144 //FLA_Apply_G_rf_opd_var6( k_G,
146 b,
147 n_A,
148 buff_G, rs_G, cs_G,
149 A1, rs_A, cs_A );
150 }
151
152 return FLA_SUCCESS;
153}
FLA_Error FLA_Apply_G_rf_asd_var6(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var6.c:394

References FLA_Apply_G_rf_asd_var6(), and i.

Referenced by FLA_Apply_G_rf_blk_var6().

◆ FLA_Apply_G_rf_bld_var6b()

FLA_Error FLA_Apply_G_rf_bld_var6b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)
142{
143 int i;
144 int b = 0;
145
146 for ( i = 0; i < m_A; i += b )
147 {
148 double* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
149 int m_behind = i;
150 int m_ahead = max( 0, m_A - i );
151
152 b = min( b_alg, m_ahead );
153
154 //FLA_Apply_G_rf_opd_var6b( k_G,
156 b,
157 n_A,
158 i_k,
159 m_behind,
160 buff_G, rs_G, cs_G,
161 A1, rs_A, cs_A );
162 }
163
164 return FLA_SUCCESS;
165}
FLA_Error FLA_Apply_G_rf_asd_var6b(int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var6b.c:131

References FLA_Apply_G_rf_asd_var6b(), and i.

Referenced by FLA_Apply_G_rf_blk_var6b().

◆ FLA_Apply_G_rf_bld_var7()

FLA_Error FLA_Apply_G_rf_bld_var7 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)

◆ FLA_Apply_G_rf_bld_var8()

FLA_Error FLA_Apply_G_rf_bld_var8 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)

◆ FLA_Apply_G_rf_bld_var8b()

FLA_Error FLA_Apply_G_rf_bld_var8b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)

◆ FLA_Apply_G_rf_bld_var9()

FLA_Error FLA_Apply_G_rf_bld_var9 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)
133{
134 int i;
135 int b = 0;
136
137 for ( i = 0; i < m_A; i += b )
138 {
139 double* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
140 int m_ahead = max( 0, m_A - i );
141
142 b = min( b_alg, m_ahead );
143
144 //FLA_Apply_G_rf_opd_var9( k_G,
146 b,
147 n_A,
148 buff_G, rs_G, cs_G,
149 A1, rs_A, cs_A );
150 }
151
152 return FLA_SUCCESS;
153}
FLA_Error FLA_Apply_G_rf_asd_var9(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var9.c:380

References FLA_Apply_G_rf_asd_var9(), and i.

Referenced by FLA_Apply_G_rf_blk_var9().

◆ FLA_Apply_G_rf_bld_var9b()

FLA_Error FLA_Apply_G_rf_bld_var9b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)
142{
143 int i;
144 int b = 0;
145
146 for ( i = 0; i < m_A; i += b )
147 {
148 double* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
149 int m_behind = i;
150 int m_ahead = max( 0, m_A - i );
151
152 b = min( b_alg, m_ahead );
153
154 //FLA_Apply_G_rf_opd_var9b( k_G,
156 b,
157 n_A,
158 i_k,
159 m_behind,
160 buff_G, rs_G, cs_G,
161 A1, rs_A, cs_A );
162 }
163
164 return FLA_SUCCESS;
165}
FLA_Error FLA_Apply_G_rf_asd_var9b(int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var9b.c:131

References FLA_Apply_G_rf_asd_var9b(), and i.

Referenced by FLA_Apply_G_rf_blk_var9b().

◆ FLA_Apply_G_rf_blk_var1()

FLA_Error FLA_Apply_G_rf_blk_var1 ( FLA_Obj  G,
FLA_Obj  A,
dim_t  b_alg 
)
14{
15 FLA_Datatype datatype;
16 int k_G, m_A, n_A;
17 int rs_G, cs_G;
18 int rs_A, cs_A;
19
20 datatype = FLA_Obj_datatype( A );
21
22 k_G = FLA_Obj_width( G );
23 m_A = FLA_Obj_length( A );
24 n_A = FLA_Obj_width( A );
25
28
31
32 switch ( datatype )
33 {
34 case FLA_FLOAT:
35 {
37 float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
38
40 m_A,
41 n_A,
44 b_alg );
45
46 break;
47 }
48
49 case FLA_DOUBLE:
50 {
52 double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
53
55 m_A,
56 n_A,
59 b_alg );
60
61 break;
62 }
63
64 case FLA_COMPLEX:
65 {
68
70 m_A,
71 n_A,
74 b_alg );
75
76 break;
77 }
78
80 {
83
85 m_A,
86 n_A,
89 b_alg );
90
91 break;
92 }
93 }
94
95 return FLA_SUCCESS;
96}
FLA_Error FLA_Apply_G_rf_blz_var1(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg)
Definition FLA_Apply_G_rf_blk_var1.c:183
FLA_Error FLA_Apply_G_rf_blc_var1(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg)
Definition FLA_Apply_G_rf_blk_var1.c:155
FLA_Error FLA_Apply_G_rf_bls_var1(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg)
Definition FLA_Apply_G_rf_blk_var1.c:99
FLA_Error FLA_Apply_G_rf_bld_var1(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg)
Definition FLA_Apply_G_rf_blk_var1.c:127

References FLA_Apply_G_rf_blc_var1(), FLA_Apply_G_rf_bld_var1(), FLA_Apply_G_rf_bls_var1(), FLA_Apply_G_rf_blz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), and i.

◆ FLA_Apply_G_rf_blk_var2()

FLA_Error FLA_Apply_G_rf_blk_var2 ( FLA_Obj  G,
FLA_Obj  A,
dim_t  b_alg 
)
14{
15 FLA_Datatype datatype;
16 int k_G, m_A, n_A;
17 int rs_G, cs_G;
18 int rs_A, cs_A;
19
20 datatype = FLA_Obj_datatype( A );
21
22 k_G = FLA_Obj_width( G );
23 m_A = FLA_Obj_length( A );
24 n_A = FLA_Obj_width( A );
25
28
31
32 switch ( datatype )
33 {
34 case FLA_FLOAT:
35 {
37 float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
38
40 m_A,
41 n_A,
44 b_alg );
45
46 break;
47 }
48
49 case FLA_DOUBLE:
50 {
52 double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
53
55 m_A,
56 n_A,
59 b_alg );
60
61 break;
62 }
63
64 case FLA_COMPLEX:
65 {
68
70 m_A,
71 n_A,
74 b_alg );
75
76 break;
77 }
78
80 {
83
85 m_A,
86 n_A,
89 b_alg );
90
91 break;
92 }
93 }
94
95 return FLA_SUCCESS;
96}
FLA_Error FLA_Apply_G_rf_blz_var2(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg)
Definition FLA_Apply_G_rf_blk_var2.c:183
FLA_Error FLA_Apply_G_rf_bld_var2(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg)
Definition FLA_Apply_G_rf_blk_var2.c:127
FLA_Error FLA_Apply_G_rf_blc_var2(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg)
Definition FLA_Apply_G_rf_blk_var2.c:155
FLA_Error FLA_Apply_G_rf_bls_var2(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg)
Definition FLA_Apply_G_rf_blk_var2.c:99

References FLA_Apply_G_rf_blc_var2(), FLA_Apply_G_rf_bld_var2(), FLA_Apply_G_rf_bls_var2(), FLA_Apply_G_rf_blz_var2(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), and i.

◆ FLA_Apply_G_rf_blk_var3()

FLA_Error FLA_Apply_G_rf_blk_var3 ( FLA_Obj  G,
FLA_Obj  A,
dim_t  b_alg 
)
14{
15 FLA_Datatype datatype;
16 int k_G, m_A, n_A;
17 int rs_G, cs_G;
18 int rs_A, cs_A;
19
20 datatype = FLA_Obj_datatype( A );
21
22 k_G = FLA_Obj_width( G );
23 m_A = FLA_Obj_length( A );
24 n_A = FLA_Obj_width( A );
25
28
31
32 switch ( datatype )
33 {
34 case FLA_FLOAT:
35 {
37 float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
38
40 m_A,
41 n_A,
44 b_alg );
45
46 break;
47 }
48
49 case FLA_DOUBLE:
50 {
52 double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
53
55 m_A,
56 n_A,
59 b_alg );
60
61 break;
62 }
63
64 case FLA_COMPLEX:
65 {
68
70 m_A,
71 n_A,
74 b_alg );
75
76 break;
77 }
78
80 {
83
85 m_A,
86 n_A,
89 b_alg );
90
91 break;
92 }
93 }
94
95 return FLA_SUCCESS;
96}
FLA_Error FLA_Apply_G_rf_blz_var3(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg)
Definition FLA_Apply_G_rf_blk_var3.c:186
FLA_Error FLA_Apply_G_rf_bld_var3(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg)
Definition FLA_Apply_G_rf_blk_var3.c:128
FLA_Error FLA_Apply_G_rf_blc_var3(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg)
Definition FLA_Apply_G_rf_blk_var3.c:157
FLA_Error FLA_Apply_G_rf_bls_var3(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg)
Definition FLA_Apply_G_rf_blk_var3.c:99

References FLA_Apply_G_rf_blc_var3(), FLA_Apply_G_rf_bld_var3(), FLA_Apply_G_rf_bls_var3(), FLA_Apply_G_rf_blz_var3(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), and i.

◆ FLA_Apply_G_rf_blk_var3b()

FLA_Error FLA_Apply_G_rf_blk_var3b ( FLA_Obj  G,
FLA_Obj  A,
dim_t  b_alg 
)
14{
15 FLA_Datatype datatype;
16 int k_G, m_A, n_A;
17 int rs_G, cs_G;
18 int rs_A, cs_A;
19
20 datatype = FLA_Obj_datatype( A );
21
22 k_G = FLA_Obj_width( G );
23 m_A = FLA_Obj_length( A );
24 n_A = FLA_Obj_width( A );
25
28
31
32 switch ( datatype )
33 {
34 case FLA_FLOAT:
35 {
37 float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
38
40 m_A,
41 n_A,
42 0,
45 b_alg );
46
47 break;
48 }
49
50 case FLA_DOUBLE:
51 {
53 double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
54
56 m_A,
57 n_A,
58 0,
61 b_alg );
62
63 break;
64 }
65
66 case FLA_COMPLEX:
67 {
70
72 m_A,
73 n_A,
74 0,
77 b_alg );
78
79 break;
80 }
81
83 {
86
88 m_A,
89 n_A,
90 0,
93 b_alg );
94
95 break;
96 }
97 }
98
99 return FLA_SUCCESS;
100}
FLA_Error FLA_Apply_G_rf_blc_var3b(int k_G, int m_A, int n_A, int i_k, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg)
Definition FLA_Apply_G_rf_blk_var3b.c:167
FLA_Error FLA_Apply_G_rf_blz_var3b(int k_G, int m_A, int n_A, int i_k, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg)
Definition FLA_Apply_G_rf_blk_var3b.c:180
FLA_Error FLA_Apply_G_rf_bls_var3b(int k_G, int m_A, int n_A, int i_k, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg)
Definition FLA_Apply_G_rf_blk_var3b.c:103
FLA_Error FLA_Apply_G_rf_bld_var3b(int k_G, int m_A, int n_A, int i_k, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg)
Definition FLA_Apply_G_rf_blk_var3b.c:135

References FLA_Apply_G_rf_blc_var3b(), FLA_Apply_G_rf_bld_var3b(), FLA_Apply_G_rf_bls_var3b(), FLA_Apply_G_rf_blz_var3b(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), and i.

◆ FLA_Apply_G_rf_blk_var4()

FLA_Error FLA_Apply_G_rf_blk_var4 ( FLA_Obj  G,
FLA_Obj  A,
dim_t  b_alg 
)

◆ FLA_Apply_G_rf_blk_var5()

FLA_Error FLA_Apply_G_rf_blk_var5 ( FLA_Obj  G,
FLA_Obj  A,
dim_t  b_alg 
)

◆ FLA_Apply_G_rf_blk_var5b()

FLA_Error FLA_Apply_G_rf_blk_var5b ( FLA_Obj  G,
FLA_Obj  A,
dim_t  b_alg 
)

◆ FLA_Apply_G_rf_blk_var6()

FLA_Error FLA_Apply_G_rf_blk_var6 ( FLA_Obj  G,
FLA_Obj  A,
dim_t  b_alg 
)
14{
15 FLA_Datatype datatype;
16 int k_G, m_A, n_A;
17 int rs_G, cs_G;
18 int rs_A, cs_A;
19
20 datatype = FLA_Obj_datatype( A );
21
22 k_G = FLA_Obj_width( G );
23 m_A = FLA_Obj_length( A );
24 n_A = FLA_Obj_width( A );
25
28
31
32 switch ( datatype )
33 {
34 case FLA_FLOAT:
35 {
37 float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
38
40 m_A,
41 n_A,
44 b_alg );
45
46 break;
47 }
48
49 case FLA_DOUBLE:
50 {
52 double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
53
55 m_A,
56 n_A,
59 b_alg );
60
61 break;
62 }
63
64 case FLA_COMPLEX:
65 {
68
70 m_A,
71 n_A,
74 b_alg );
75
76 break;
77 }
78
80 {
83
85 m_A,
86 n_A,
89 b_alg );
90
91 break;
92 }
93 }
94
95 return FLA_SUCCESS;
96}
FLA_Error FLA_Apply_G_rf_blc_var6(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg)
Definition FLA_Apply_G_rf_blk_var6.c:155
FLA_Error FLA_Apply_G_rf_bld_var6(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg)
Definition FLA_Apply_G_rf_blk_var6.c:127
FLA_Error FLA_Apply_G_rf_bls_var6(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg)
Definition FLA_Apply_G_rf_blk_var6.c:99
FLA_Error FLA_Apply_G_rf_blz_var6(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg)
Definition FLA_Apply_G_rf_blk_var6.c:183

References FLA_Apply_G_rf_blc_var6(), FLA_Apply_G_rf_bld_var6(), FLA_Apply_G_rf_bls_var6(), FLA_Apply_G_rf_blz_var6(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), and i.

◆ FLA_Apply_G_rf_blk_var6b()

FLA_Error FLA_Apply_G_rf_blk_var6b ( FLA_Obj  G,
FLA_Obj  A,
dim_t  b_alg 
)
14{
15 FLA_Datatype datatype;
16 int k_G, m_A, n_A;
17 int rs_G, cs_G;
18 int rs_A, cs_A;
19
20 datatype = FLA_Obj_datatype( A );
21
22 k_G = FLA_Obj_width( G );
23 m_A = FLA_Obj_length( A );
24 n_A = FLA_Obj_width( A );
25
28
31
32 switch ( datatype )
33 {
34 case FLA_FLOAT:
35 {
37 float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
38
40 m_A,
41 n_A,
42 0,
45 b_alg );
46
47 break;
48 }
49
50 case FLA_DOUBLE:
51 {
53 double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
54
56 m_A,
57 n_A,
58 0,
61 b_alg );
62
63 break;
64 }
65
66 case FLA_COMPLEX:
67 {
70
72 m_A,
73 n_A,
74 0,
77 b_alg );
78
79 break;
80 }
81
83 {
86
88 m_A,
89 n_A,
90 0,
93 b_alg );
94
95 break;
96 }
97 }
98
99 return FLA_SUCCESS;
100}
FLA_Error FLA_Apply_G_rf_blc_var6b(int k_G, int m_A, int n_A, int i_k, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg)
Definition FLA_Apply_G_rf_blk_var6b.c:167
FLA_Error FLA_Apply_G_rf_blz_var6b(int k_G, int m_A, int n_A, int i_k, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg)
Definition FLA_Apply_G_rf_blk_var6b.c:180
FLA_Error FLA_Apply_G_rf_bls_var6b(int k_G, int m_A, int n_A, int i_k, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg)
Definition FLA_Apply_G_rf_blk_var6b.c:103
FLA_Error FLA_Apply_G_rf_bld_var6b(int k_G, int m_A, int n_A, int i_k, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg)
Definition FLA_Apply_G_rf_blk_var6b.c:135

References FLA_Apply_G_rf_blc_var6b(), FLA_Apply_G_rf_bld_var6b(), FLA_Apply_G_rf_bls_var6b(), FLA_Apply_G_rf_blz_var6b(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), and i.

◆ FLA_Apply_G_rf_blk_var7()

FLA_Error FLA_Apply_G_rf_blk_var7 ( FLA_Obj  G,
FLA_Obj  A,
dim_t  b_alg 
)

◆ FLA_Apply_G_rf_blk_var8()

FLA_Error FLA_Apply_G_rf_blk_var8 ( FLA_Obj  G,
FLA_Obj  A,
dim_t  b_alg 
)

◆ FLA_Apply_G_rf_blk_var8b()

FLA_Error FLA_Apply_G_rf_blk_var8b ( FLA_Obj  G,
FLA_Obj  A,
dim_t  b_alg 
)

◆ FLA_Apply_G_rf_blk_var9()

FLA_Error FLA_Apply_G_rf_blk_var9 ( FLA_Obj  G,
FLA_Obj  A,
dim_t  b_alg 
)
14{
15 FLA_Datatype datatype;
16 int k_G, m_A, n_A;
17 int rs_G, cs_G;
18 int rs_A, cs_A;
19
20 datatype = FLA_Obj_datatype( A );
21
22 k_G = FLA_Obj_width( G );
23 m_A = FLA_Obj_length( A );
24 n_A = FLA_Obj_width( A );
25
28
31
32 switch ( datatype )
33 {
34 case FLA_FLOAT:
35 {
37 float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
38
40 m_A,
41 n_A,
44 b_alg );
45
46 break;
47 }
48
49 case FLA_DOUBLE:
50 {
52 double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
53
55 m_A,
56 n_A,
59 b_alg );
60
61 break;
62 }
63
64 case FLA_COMPLEX:
65 {
68
70 m_A,
71 n_A,
74 b_alg );
75
76 break;
77 }
78
80 {
83
85 m_A,
86 n_A,
89 b_alg );
90
91 break;
92 }
93 }
94
95 return FLA_SUCCESS;
96}
FLA_Error FLA_Apply_G_rf_bls_var9(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg)
Definition FLA_Apply_G_rf_blk_var9.c:99
FLA_Error FLA_Apply_G_rf_blz_var9(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg)
Definition FLA_Apply_G_rf_blk_var9.c:183
FLA_Error FLA_Apply_G_rf_blc_var9(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg)
Definition FLA_Apply_G_rf_blk_var9.c:155
FLA_Error FLA_Apply_G_rf_bld_var9(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg)
Definition FLA_Apply_G_rf_blk_var9.c:127

References FLA_Apply_G_rf_blc_var9(), FLA_Apply_G_rf_bld_var9(), FLA_Apply_G_rf_bls_var9(), FLA_Apply_G_rf_blz_var9(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), and i.

◆ FLA_Apply_G_rf_blk_var9b()

FLA_Error FLA_Apply_G_rf_blk_var9b ( FLA_Obj  G,
FLA_Obj  A,
dim_t  b_alg 
)
14{
15 FLA_Datatype datatype;
16 int k_G, m_A, n_A;
17 int rs_G, cs_G;
18 int rs_A, cs_A;
19
20 datatype = FLA_Obj_datatype( A );
21
22 k_G = FLA_Obj_width( G );
23 m_A = FLA_Obj_length( A );
24 n_A = FLA_Obj_width( A );
25
28
31
32 switch ( datatype )
33 {
34 case FLA_FLOAT:
35 {
37 float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
38
40 m_A,
41 n_A,
42 0,
45 b_alg );
46
47 break;
48 }
49
50 case FLA_DOUBLE:
51 {
53 double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
54
56 m_A,
57 n_A,
58 0,
61 b_alg );
62
63 break;
64 }
65
66 case FLA_COMPLEX:
67 {
70
72 m_A,
73 n_A,
74 0,
77 b_alg );
78
79 break;
80 }
81
83 {
86
88 m_A,
89 n_A,
90 0,
93 b_alg );
94
95 break;
96 }
97 }
98
99 return FLA_SUCCESS;
100}
FLA_Error FLA_Apply_G_rf_bld_var9b(int k_G, int m_A, int n_A, int i_k, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg)
Definition FLA_Apply_G_rf_blk_var9b.c:135
FLA_Error FLA_Apply_G_rf_blc_var9b(int k_G, int m_A, int n_A, int i_k, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg)
Definition FLA_Apply_G_rf_blk_var9b.c:167
FLA_Error FLA_Apply_G_rf_bls_var9b(int k_G, int m_A, int n_A, int i_k, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg)
Definition FLA_Apply_G_rf_blk_var9b.c:103
FLA_Error FLA_Apply_G_rf_blz_var9b(int k_G, int m_A, int n_A, int i_k, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg)
Definition FLA_Apply_G_rf_blk_var9b.c:180

References FLA_Apply_G_rf_blc_var9b(), FLA_Apply_G_rf_bld_var9b(), FLA_Apply_G_rf_bls_var9b(), FLA_Apply_G_rf_blz_var9b(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), and i.

◆ FLA_Apply_G_rf_bls_var1()

FLA_Error FLA_Apply_G_rf_bls_var1 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
float buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)
105{
106 int i;
107 int b = 0;
108
109 for ( i = 0; i < m_A; i += b )
110 {
111 float* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
112 int m_ahead = max( 0, m_A - i );
113
114 b = min( b_alg, m_ahead );
115
116 //FLA_Apply_G_rf_ops_var1( k_G,
118 b,
119 n_A,
120 buff_G, rs_G, cs_G,
121 A1, rs_A, cs_A );
122 }
123
124 return FLA_SUCCESS;
125}

References FLA_Apply_G_rf_ass_var1(), and i.

Referenced by FLA_Apply_G_rf_blk_var1().

◆ FLA_Apply_G_rf_bls_var2()

FLA_Error FLA_Apply_G_rf_bls_var2 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
float buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)
105{
106 int i;
107 int b = 0;
108
109 for ( i = 0; i < m_A; i += b )
110 {
111 float* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
112 int m_ahead = max( 0, m_A - i );
113
114 b = min( b_alg, m_ahead );
115
116 //FLA_Apply_G_rf_ops_var2( k_G,
118 b,
119 n_A,
120 buff_G, rs_G, cs_G,
121 A1, rs_A, cs_A );
122 }
123
124 return FLA_SUCCESS;
125}
FLA_Error FLA_Apply_G_rf_ass_var2(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var2.c:113

References FLA_Apply_G_rf_ass_var2(), and i.

Referenced by FLA_Apply_G_rf_blk_var2().

◆ FLA_Apply_G_rf_bls_var3()

FLA_Error FLA_Apply_G_rf_bls_var3 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
float buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)
105{
106 int i;
107 int b = 0;
108
109 for ( i = 0; i < m_A; i += b )
110 {
111 float* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
112 int m_ahead = max( 0, m_A - i );
113
114 b = min( b_alg, m_ahead );
115
116 // ass_var3 does not support arbitrary strides: TODO
118 //FLA_Apply_G_rf_ass_var3( k_G,
119 b,
120 n_A,
121 buff_G, rs_G, cs_G,
122 A1, rs_A, cs_A );
123 }
124
125 return FLA_SUCCESS;
126}
FLA_Error FLA_Apply_G_rf_ops_var3(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_opt_var3.c:112

References FLA_Apply_G_rf_ops_var3(), and i.

Referenced by FLA_Apply_G_lf_blk_var3(), FLA_Apply_G_rf_blk_var3(), FLA_Bsvd_ext_ops_var1(), and FLA_Bsvd_v_ops_var1().

◆ FLA_Apply_G_rf_bls_var3b()

FLA_Error FLA_Apply_G_rf_bls_var3b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
scomplex buff_G,
int  rs_G,
int  cs_G,
float buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)
110{
111 int i;
112 int b = 0;
113
114 for ( i = 0; i < m_A; i += b )
115 {
116 float* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
117 int m_behind = i;
118 int m_ahead = max( 0, m_A - i );
119
120 b = min( b_alg, m_ahead );
121
122 //FLA_Apply_G_rf_ops_var3b( k_G,
124 b,
125 n_A,
126 i_k,
127 m_behind,
128 buff_G, rs_G, cs_G,
129 A1, rs_A, cs_A );
130 }
131
132 return FLA_SUCCESS;
133}
FLA_Error FLA_Apply_G_rf_ass_var3b(int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var3b.c:118

References FLA_Apply_G_rf_ass_var3b(), and i.

Referenced by FLA_Apply_G_rf_blk_var3b().

◆ FLA_Apply_G_rf_bls_var4()

FLA_Error FLA_Apply_G_rf_bls_var4 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
float buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)

◆ FLA_Apply_G_rf_bls_var5()

FLA_Error FLA_Apply_G_rf_bls_var5 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
float buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)

◆ FLA_Apply_G_rf_bls_var5b()

FLA_Error FLA_Apply_G_rf_bls_var5b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
scomplex buff_G,
int  rs_G,
int  cs_G,
float buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)

◆ FLA_Apply_G_rf_bls_var6()

FLA_Error FLA_Apply_G_rf_bls_var6 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
float buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)
105{
106 int i;
107 int b = 0;
108
109 for ( i = 0; i < m_A; i += b )
110 {
111 float* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
112 int m_ahead = max( 0, m_A - i );
113
114 b = min( b_alg, m_ahead );
115
116 //FLA_Apply_G_rf_ops_var6( k_G,
118 b,
119 n_A,
120 buff_G, rs_G, cs_G,
121 A1, rs_A, cs_A );
122 }
123
124 return FLA_SUCCESS;
125}
FLA_Error FLA_Apply_G_rf_ass_var6(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var6.c:113

References FLA_Apply_G_rf_ass_var6(), and i.

Referenced by FLA_Apply_G_rf_blk_var6().

◆ FLA_Apply_G_rf_bls_var6b()

FLA_Error FLA_Apply_G_rf_bls_var6b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
scomplex buff_G,
int  rs_G,
int  cs_G,
float buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)
110{
111 int i;
112 int b = 0;
113
114 for ( i = 0; i < m_A; i += b )
115 {
116 float* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
117 int m_behind = i;
118 int m_ahead = max( 0, m_A - i );
119
120 b = min( b_alg, m_ahead );
121
122 //FLA_Apply_G_rf_ops_var6b( k_G,
124 b,
125 n_A,
126 i_k,
127 m_behind,
128 buff_G, rs_G, cs_G,
129 A1, rs_A, cs_A );
130 }
131
132 return FLA_SUCCESS;
133}
FLA_Error FLA_Apply_G_rf_ass_var6b(int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var6b.c:118

References FLA_Apply_G_rf_ass_var6b(), and i.

Referenced by FLA_Apply_G_rf_blk_var6b().

◆ FLA_Apply_G_rf_bls_var7()

FLA_Error FLA_Apply_G_rf_bls_var7 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
float buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)

◆ FLA_Apply_G_rf_bls_var8()

FLA_Error FLA_Apply_G_rf_bls_var8 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
float buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)

◆ FLA_Apply_G_rf_bls_var8b()

FLA_Error FLA_Apply_G_rf_bls_var8b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
scomplex buff_G,
int  rs_G,
int  cs_G,
float buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)

◆ FLA_Apply_G_rf_bls_var9()

FLA_Error FLA_Apply_G_rf_bls_var9 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
float buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)
105{
106 int i;
107 int b = 0;
108
109 for ( i = 0; i < m_A; i += b )
110 {
111 float* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
112 int m_ahead = max( 0, m_A - i );
113
114 b = min( b_alg, m_ahead );
115
116 //FLA_Apply_G_rf_ops_var9( k_G,
118 b,
119 n_A,
120 buff_G, rs_G, cs_G,
121 A1, rs_A, cs_A );
122 }
123
124 return FLA_SUCCESS;
125}
FLA_Error FLA_Apply_G_rf_ass_var9(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var9.c:113

References FLA_Apply_G_rf_ass_var9(), and i.

Referenced by FLA_Apply_G_rf_blk_var9().

◆ FLA_Apply_G_rf_bls_var9b()

FLA_Error FLA_Apply_G_rf_bls_var9b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
scomplex buff_G,
int  rs_G,
int  cs_G,
float buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)
110{
111 int i;
112 int b = 0;
113
114 for ( i = 0; i < m_A; i += b )
115 {
116 float* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
117 int m_behind = i;
118 int m_ahead = max( 0, m_A - i );
119
120 b = min( b_alg, m_ahead );
121
122 //FLA_Apply_G_rf_ops_var9b( k_G,
124 b,
125 n_A,
126 i_k,
127 m_behind,
128 buff_G, rs_G, cs_G,
129 A1, rs_A, cs_A );
130 }
131
132 return FLA_SUCCESS;
133}
FLA_Error FLA_Apply_G_rf_ass_var9b(int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var9b.c:118

References FLA_Apply_G_rf_ass_var9b(), and i.

Referenced by FLA_Apply_G_rf_blk_var9b().

◆ FLA_Apply_G_rf_blz_var1()

FLA_Error FLA_Apply_G_rf_blz_var1 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)
189{
190 int i;
191 int b = 0;
192
193 for ( i = 0; i < m_A; i += b )
194 {
195 dcomplex* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
196 int m_ahead = max( 0, m_A - i );
197
198 b = min( b_alg, m_ahead );
199
200 //FLA_Apply_G_rf_opz_var1( k_G,
202 b,
203 n_A,
204 buff_G, rs_G, cs_G,
205 A1, rs_A, cs_A );
206 }
207
208 return FLA_SUCCESS;
209}

References FLA_Apply_G_rf_asz_var1(), and i.

Referenced by FLA_Apply_G_rf_blk_var1().

◆ FLA_Apply_G_rf_blz_var2()

FLA_Error FLA_Apply_G_rf_blz_var2 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)
189{
190 int i;
191 int b = 0;
192
193 for ( i = 0; i < m_A; i += b )
194 {
195 dcomplex* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
196 int m_ahead = max( 0, m_A - i );
197
198 b = min( b_alg, m_ahead );
199
200 //FLA_Apply_G_rf_opz_var2( k_G,
202 b,
203 n_A,
204 buff_G, rs_G, cs_G,
205 A1, rs_A, cs_A );
206 }
207
208 return FLA_SUCCESS;
209}
FLA_Error FLA_Apply_G_rf_asz_var2(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var2.c:452

References FLA_Apply_G_rf_asz_var2(), and i.

Referenced by FLA_Apply_G_rf_blk_var2().

◆ FLA_Apply_G_rf_blz_var3()

FLA_Error FLA_Apply_G_rf_blz_var3 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)
192{
193 int i;
194 int b = 0;
195
196 for ( i = 0; i < m_A; i += b )
197 {
198 dcomplex* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
199 int m_ahead = max( 0, m_A - i );
200
201 b = min( b_alg, m_ahead );
202
203 // ass_var3 does not support arbitrary strides: TODO
205 //FLA_Apply_G_rf_asz_var3( k_G,
206 b,
207 n_A,
208 buff_G, rs_G, cs_G,
209 A1, rs_A, cs_A );
210 }
211
212 return FLA_SUCCESS;
213}
FLA_Error FLA_Apply_G_rf_opz_var3(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_opt_var3.c:1471

References FLA_Apply_G_rf_opz_var3(), and i.

Referenced by FLA_Apply_G_lf_blk_var3(), FLA_Apply_G_rf_blk_var3(), FLA_Bsvd_ext_opz_var1(), FLA_Bsvd_v_opz_var1(), and FLA_Tevd_v_opz_var1().

◆ FLA_Apply_G_rf_blz_var3b()

FLA_Error FLA_Apply_G_rf_blz_var3b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)
187{
189
190 return FLA_SUCCESS;
191}

References i.

Referenced by FLA_Apply_G_rf_blk_var3b().

◆ FLA_Apply_G_rf_blz_var4()

FLA_Error FLA_Apply_G_rf_blz_var4 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)

◆ FLA_Apply_G_rf_blz_var5()

FLA_Error FLA_Apply_G_rf_blz_var5 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)

◆ FLA_Apply_G_rf_blz_var5b()

FLA_Error FLA_Apply_G_rf_blz_var5b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)

◆ FLA_Apply_G_rf_blz_var6()

FLA_Error FLA_Apply_G_rf_blz_var6 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)
189{
190 int i;
191 int b = 0;
192
193 for ( i = 0; i < m_A; i += b )
194 {
195 dcomplex* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
196 int m_ahead = max( 0, m_A - i );
197
198 b = min( b_alg, m_ahead );
199
200 //FLA_Apply_G_rf_opz_var6( k_G,
202 b,
203 n_A,
204 buff_G, rs_G, cs_G,
205 A1, rs_A, cs_A );
206 }
207
208 return FLA_SUCCESS;
209}
FLA_Error FLA_Apply_G_rf_asz_var6(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var6.c:956

References FLA_Apply_G_rf_asz_var6(), and i.

Referenced by FLA_Apply_G_rf_blk_var6().

◆ FLA_Apply_G_rf_blz_var6b()

FLA_Error FLA_Apply_G_rf_blz_var6b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)
187{
189
190 return FLA_SUCCESS;
191}

References i.

Referenced by FLA_Apply_G_rf_blk_var6b().

◆ FLA_Apply_G_rf_blz_var7()

FLA_Error FLA_Apply_G_rf_blz_var7 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)

◆ FLA_Apply_G_rf_blz_var8()

FLA_Error FLA_Apply_G_rf_blz_var8 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)

◆ FLA_Apply_G_rf_blz_var8b()

FLA_Error FLA_Apply_G_rf_blz_var8b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)

◆ FLA_Apply_G_rf_blz_var9()

FLA_Error FLA_Apply_G_rf_blz_var9 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)
189{
190 int i;
191 int b = 0;
192
193 for ( i = 0; i < m_A; i += b )
194 {
195 dcomplex* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
196 int m_ahead = max( 0, m_A - i );
197
198 b = min( b_alg, m_ahead );
199
200 //FLA_Apply_G_rf_opz_var9( k_G,
202 b,
203 n_A,
204 buff_G, rs_G, cs_G,
205 A1, rs_A, cs_A );
206 }
207
208 return FLA_SUCCESS;
209}
FLA_Error FLA_Apply_G_rf_asz_var9(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var9.c:914

References FLA_Apply_G_rf_asz_var9(), and i.

Referenced by FLA_Apply_G_rf_blk_var9().

◆ FLA_Apply_G_rf_blz_var9b()

FLA_Error FLA_Apply_G_rf_blz_var9b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)
187{
189
190 return FLA_SUCCESS;
191}

References i.

Referenced by FLA_Apply_G_rf_blk_var9b().

◆ FLA_Apply_G_rf_opc_var1()

FLA_Error FLA_Apply_G_rf_opc_var1 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A 
)
220{
221 float one = bl1_s1();
222 float zero = bl1_s0();
223 int nG_app = n_A - 1;
224 int l, j;
225 float gamma;
226 float sigma;
227 scomplex* a1;
228 scomplex* a2;
229 scomplex* g1;
230 scomplex* g11;
231
232 g1 = buff_G;
233
234 for ( l = 0; l < k_G; ++l )
235 {
236 a1 = buff_A;
237 a2 = buff_A + cs_A;
238 g11 = g1;
239
240 for ( j = 0; j < nG_app; ++j )
241 {
242 gamma = g11->real;
243 sigma = g11->imag;
244
245 // Skip the current iteration if the rotation is identity.
246 if ( gamma != one || sigma != zero )
247 {
249 &gamma,
250 &sigma,
251 a1, rs_A,
252 a2, rs_A );
253 }
254
255 a1 += cs_A;
256 a2 += cs_A;
257 g11 += rs_G;
258 }
259
260 g1 += cs_G;
261 }
262
263 return FLA_SUCCESS;
264}

References bl1_s0(), bl1_s1(), i, and scomplex::real.

Referenced by FLA_Apply_G_lf_opt_var1(), FLA_Apply_G_rf_opc_var2(), FLA_Apply_G_rf_opc_var3(), FLA_Apply_G_rf_opc_var6(), FLA_Apply_G_rf_opc_var9(), and FLA_Apply_G_rf_opt_var1().

◆ FLA_Apply_G_rf_opc_var2()

FLA_Error FLA_Apply_G_rf_opc_var2 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A 
)
343{
344 float one = bl1_s1();
345 float zero = bl1_s0();
346 float gamma;
347 float sigma;
348 scomplex* a1;
349 scomplex* a2;
350 scomplex* g11;
351 int j, g, k;
352 int nG, nG_app;
353 int k_minus_1;
354
355 k_minus_1 = k_G - 1;
356 nG = n_A - 1;
357
358 // Use the simple variant for nG < 2(k - 1).
359 if ( nG < k_minus_1 || k_G == 1 )
360 {
362 m_A,
363 n_A,
364 buff_G, rs_G, cs_G,
365 buff_A, rs_A, cs_A );
366 return FLA_SUCCESS;
367 }
368
369
370 // Start-up phase.
371
372 for ( j = 0; j < k_minus_1; ++j )
373 {
374 nG_app = j + 1;
375
376 for ( k = 0, g = nG_app - 1; k < nG_app; ++k, --g )
377 {
378 g11 = buff_G + (g )*rs_G + (k )*cs_G;
379 a1 = buff_A + (g )*cs_A;
380 a2 = buff_A + (g + 1)*cs_A;
381
382 gamma = g11->real;
383 sigma = g11->imag;
384
385 // Skip the current iteration if the rotation is identity.
386 if ( gamma == one && sigma == zero ) continue;
387
389 &gamma,
390 &sigma,
391 a1, rs_A,
392 a2, rs_A );
393 }
394 }
395
396 // Pipeline stage
397
398 for ( j = k_minus_1; j < nG; ++j )
399 {
400 nG_app = k_G;
401
402 for ( k = 0, g = j; k < nG_app; ++k, --g )
403 {
404 g11 = buff_G + (g )*rs_G + (k )*cs_G;
405 a1 = buff_A + (g )*cs_A;
406 a2 = buff_A + (g + 1)*cs_A;
407
408 gamma = g11->real;
409 sigma = g11->imag;
410
411 // Skip the current iteration if the rotation is identity.
412 if ( gamma == one && sigma == zero ) continue;
413
415 &gamma,
416 &sigma,
417 a1, rs_A,
418 a2, rs_A );
419 }
420 }
421
422 // Shutdown stage
423
424 for ( j = nG - k_minus_1; j < nG; ++j )
425 {
426 nG_app = nG - j;
427
428 for ( k = k_G - nG_app, g = nG - 1; k < k_G; ++k, --g )
429 {
430 g11 = buff_G + (g )*rs_G + (k )*cs_G;
431 a1 = buff_A + (g )*cs_A;
432 a2 = buff_A + (g + 1)*cs_A;
433
434 gamma = g11->real;
435 sigma = g11->imag;
436
437 // Skip the current iteration if the rotation is identity.
438 if ( gamma == one && sigma == zero ) continue;
439
441 &gamma,
442 &sigma,
443 a1, rs_A,
444 a2, rs_A );
445 }
446 }
447
448 return FLA_SUCCESS;
449}
FLA_Error FLA_Apply_G_rf_opc_var1(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_opt_var1.c:215

References bl1_s0(), bl1_s1(), FLA_Apply_G_rf_opc_var1(), and i.

Referenced by FLA_Apply_G_rf_opt_var2().

◆ FLA_Apply_G_rf_opc_var3()

FLA_Error FLA_Apply_G_rf_opc_var3 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A 
)
1023{
1024 float one = bl1_s1();
1025 float zero = bl1_s0();
1026 float gamma23_k1;
1027 float sigma23_k1;
1028 float gamma34_k1;
1029 float sigma34_k1;
1030 float gamma12_k2;
1031 float sigma12_k2;
1032 float gamma23_k2;
1033 float sigma23_k2;
1034 scomplex* a1;
1035 scomplex* a2;
1036 scomplex* a3;
1037 scomplex* a4;
1042 int i, j, g, k;
1043 int nG, nG_app;
1044 int n_iter;
1045 int n_left;
1046 int k_minus_1;
1047 int n_fuse;
1048 int k_fuse;
1051 int has_ident;
1052
1053 k_minus_1 = k_G - 1;
1054 nG = n_A - 1;
1055 n_fuse = 2;
1056 k_fuse = 2;
1057
1058 // Use the simple variant for nG < (k - 1) or k == 1.
1059 if ( nG < 2*k_minus_1 || k_G == 1 )
1060 {
1062 m_A,
1063 n_A,
1064 buff_G, rs_G, cs_G,
1065 buff_A, rs_A, cs_A );
1066 return FLA_SUCCESS;
1067 }
1068
1069
1070 // Start-up phase.
1071
1072 for ( j = -1; j < k_minus_1; j += n_fuse )
1073 {
1074 nG_app = j + 2;
1075 n_iter = nG_app / k_fuse;
1076 n_left = 1;
1077
1078 for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
1079 {
1080 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1081 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1082 g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
1083 g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
1084 a1 = buff_A + (g - 1)*cs_A;
1085 a2 = buff_A + (g )*cs_A;
1086 a3 = buff_A + (g + 1)*cs_A;
1087 a4 = buff_A + (g + 2)*cs_A;
1088
1089 gamma23_k1 = g23_k1->real;
1090 sigma23_k1 = g23_k1->imag;
1091 gamma34_k1 = g34_k1->real;
1092 sigma34_k1 = g34_k1->imag;
1093 gamma12_k2 = g12_k2->real;
1094 sigma12_k2 = g12_k2->imag;
1095 gamma23_k2 = g23_k2->real;
1096 sigma23_k2 = g23_k2->imag;
1097
1104
1105 if ( has_ident )
1106 {
1107 // Apply to pairs of columns as needed.
1108
1109 if ( !is_ident23_k1 )
1111 &gamma23_k1,
1112 &sigma23_k1,
1113 a2, rs_A,
1114 a3, rs_A );
1115
1116 if ( !is_ident34_k1 )
1118 &gamma34_k1,
1119 &sigma34_k1,
1120 a3, rs_A,
1121 a4, rs_A );
1122
1123 if ( !is_ident12_k2 )
1125 &gamma12_k2,
1126 &sigma12_k2,
1127 a1, rs_A,
1128 a2, rs_A );
1129
1130 if ( !is_ident23_k2 )
1132 &gamma23_k2,
1133 &sigma23_k2,
1134 a2, rs_A,
1135 a3, rs_A );
1136 }
1137 else
1138 {
1139 // Apply to all four columns.
1140
1142 &gamma23_k1,
1143 &sigma23_k1,
1144 &gamma34_k1,
1145 &sigma34_k1,
1146 &gamma12_k2,
1147 &sigma12_k2,
1148 &gamma23_k2,
1149 &sigma23_k2,
1150 a1, rs_A,
1151 a2, rs_A,
1152 a3, rs_A,
1153 a4, rs_A );
1154 }
1155 }
1156
1157 if ( n_left == 1 )
1158 {
1159 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1160 a3 = buff_A + (g + 1)*cs_A;
1161 a4 = buff_A + (g + 2)*cs_A;
1162
1163 gamma34_k1 = g34_k1->real;
1164 sigma34_k1 = g34_k1->imag;
1165
1167
1168 if ( !is_ident34_k1 )
1170 &gamma34_k1,
1171 &sigma34_k1,
1172 a3, rs_A,
1173 a4, rs_A );
1174 }
1175 }
1176
1177 // Pipeline stage
1178
1179 for ( ; j < nG - 1; j += n_fuse )
1180 {
1181 nG_app = k_G;
1182 n_iter = nG_app / k_fuse;
1183 n_left = nG_app % k_fuse;
1184
1185 for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
1186 {
1187 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1188 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1189 g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
1190 g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
1191 a1 = buff_A + (g - 1)*cs_A;
1192 a2 = buff_A + (g )*cs_A;
1193 a3 = buff_A + (g + 1)*cs_A;
1194 a4 = buff_A + (g + 2)*cs_A;
1195
1196 gamma23_k1 = g23_k1->real;
1197 sigma23_k1 = g23_k1->imag;
1198 gamma34_k1 = g34_k1->real;
1199 sigma34_k1 = g34_k1->imag;
1200 gamma12_k2 = g12_k2->real;
1201 sigma12_k2 = g12_k2->imag;
1202 gamma23_k2 = g23_k2->real;
1203 sigma23_k2 = g23_k2->imag;
1204
1211
1212 if ( has_ident )
1213 {
1214 // Apply to pairs of columns as needed.
1215
1216 if ( !is_ident23_k1 )
1218 &gamma23_k1,
1219 &sigma23_k1,
1220 a2, rs_A,
1221 a3, rs_A );
1222
1223 if ( !is_ident34_k1 )
1225 &gamma34_k1,
1226 &sigma34_k1,
1227 a3, rs_A,
1228 a4, rs_A );
1229
1230 if ( !is_ident12_k2 )
1232 &gamma12_k2,
1233 &sigma12_k2,
1234 a1, rs_A,
1235 a2, rs_A );
1236
1237 if ( !is_ident23_k2 )
1239 &gamma23_k2,
1240 &sigma23_k2,
1241 a2, rs_A,
1242 a3, rs_A );
1243 }
1244 else
1245 {
1246 // Apply to all four columns.
1247
1249 &gamma23_k1,
1250 &sigma23_k1,
1251 &gamma34_k1,
1252 &sigma34_k1,
1253 &gamma12_k2,
1254 &sigma12_k2,
1255 &gamma23_k2,
1256 &sigma23_k2,
1257 a1, rs_A,
1258 a2, rs_A,
1259 a3, rs_A,
1260 a4, rs_A );
1261 }
1262 }
1263
1264 if ( n_left == 1 )
1265 {
1266 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1267 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1268 a2 = buff_A + (g )*cs_A;
1269 a3 = buff_A + (g + 1)*cs_A;
1270 a4 = buff_A + (g + 2)*cs_A;
1271
1272 gamma23_k1 = g23_k1->real;
1273 sigma23_k1 = g23_k1->imag;
1274 gamma34_k1 = g34_k1->real;
1275 sigma34_k1 = g34_k1->imag;
1276
1279
1280 if ( !is_ident23_k1 && is_ident34_k1 )
1281 {
1283 &gamma23_k1,
1284 &sigma23_k1,
1285 a2, rs_A,
1286 a3, rs_A );
1287 }
1288 else if ( is_ident23_k1 && !is_ident34_k1 )
1289 {
1291 &gamma34_k1,
1292 &sigma34_k1,
1293 a3, rs_A,
1294 a4, rs_A );
1295 }
1296 else
1297 {
1299 &gamma23_k1,
1300 &sigma23_k1,
1301 &gamma34_k1,
1302 &sigma34_k1,
1303 a2, rs_A,
1304 a3, rs_A,
1305 a4, rs_A );
1306 }
1307 }
1308 }
1309
1310 // Shutdown stage
1311
1312 for ( j = nG % n_fuse; j < k_G; j += n_fuse )
1313 {
1314 g = nG - 1;
1315 k = j;
1316
1317 //n_left = 1;
1318 //if ( n_left == 1 )
1319 {
1320 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1321 a2 = buff_A + (g )*cs_A;
1322 a3 = buff_A + (g + 1)*cs_A;
1323
1324 gamma23_k1 = g23_k1->real;
1325 sigma23_k1 = g23_k1->imag;
1326
1328
1329 if ( !is_ident23_k1 )
1331 &gamma23_k1,
1332 &sigma23_k1,
1333 a2, rs_A,
1334 a3, rs_A );
1335 ++k;
1336 --g;
1337 }
1338
1339 nG_app = k_minus_1 - j;
1340 n_iter = nG_app / k_fuse;
1341 n_left = nG_app % k_fuse;
1342
1343 for ( i = 0; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
1344 {
1345 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1346 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1347 g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
1348 g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
1349 a1 = buff_A + (g - 1)*cs_A;
1350 a2 = buff_A + (g )*cs_A;
1351 a3 = buff_A + (g + 1)*cs_A;
1352 a4 = buff_A + (g + 2)*cs_A;
1353
1354 gamma23_k1 = g23_k1->real;
1355 sigma23_k1 = g23_k1->imag;
1356 gamma34_k1 = g34_k1->real;
1357 sigma34_k1 = g34_k1->imag;
1358 gamma12_k2 = g12_k2->real;
1359 sigma12_k2 = g12_k2->imag;
1360 gamma23_k2 = g23_k2->real;
1361 sigma23_k2 = g23_k2->imag;
1362
1369
1370 if ( has_ident )
1371 {
1372 // Apply to pairs of columns as needed.
1373
1374 if ( !is_ident23_k1 )
1376 &gamma23_k1,
1377 &sigma23_k1,
1378 a2, rs_A,
1379 a3, rs_A );
1380
1381 if ( !is_ident34_k1 )
1383 &gamma34_k1,
1384 &sigma34_k1,
1385 a3, rs_A,
1386 a4, rs_A );
1387
1388 if ( !is_ident12_k2 )
1390 &gamma12_k2,
1391 &sigma12_k2,
1392 a1, rs_A,
1393 a2, rs_A );
1394
1395 if ( !is_ident23_k2 )
1397 &gamma23_k2,
1398 &sigma23_k2,
1399 a2, rs_A,
1400 a3, rs_A );
1401 }
1402 else
1403 {
1404 // Apply to all four columns.
1405
1407 &gamma23_k1,
1408 &sigma23_k1,
1409 &gamma34_k1,
1410 &sigma34_k1,
1411 &gamma12_k2,
1412 &sigma12_k2,
1413 &gamma23_k2,
1414 &sigma23_k2,
1415 a1, rs_A,
1416 a2, rs_A,
1417 a3, rs_A,
1418 a4, rs_A );
1419 }
1420 }
1421
1422 if ( n_left == 1 )
1423 {
1424 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1425 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1426 a2 = buff_A + (g )*cs_A;
1427 a3 = buff_A + (g + 1)*cs_A;
1428 a4 = buff_A + (g + 2)*cs_A;
1429
1430 gamma23_k1 = g23_k1->real;
1431 sigma23_k1 = g23_k1->imag;
1432 gamma34_k1 = g34_k1->real;
1433 sigma34_k1 = g34_k1->imag;
1434
1437
1438 if ( !is_ident23_k1 && is_ident34_k1 )
1439 {
1441 &gamma23_k1,
1442 &sigma23_k1,
1443 a2, rs_A,
1444 a3, rs_A );
1445 }
1446 else if ( is_ident23_k1 && !is_ident34_k1 )
1447 {
1449 &gamma34_k1,
1450 &sigma34_k1,
1451 a3, rs_A,
1452 a4, rs_A );
1453 }
1454 else
1455 {
1457 &gamma23_k1,
1458 &sigma23_k1,
1459 &gamma34_k1,
1460 &sigma34_k1,
1461 a2, rs_A,
1462 a3, rs_A,
1463 a4, rs_A );
1464 }
1465 }
1466 }
1467
1468 return FLA_SUCCESS;
1469}

References bl1_s0(), bl1_s1(), FLA_Apply_G_rf_opc_var1(), i, and n_left.

Referenced by FLA_Apply_G_rf_blc_var3(), and FLA_Apply_G_rf_opt_var3().

◆ FLA_Apply_G_rf_opc_var4()

FLA_Error FLA_Apply_G_rf_opc_var4 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_opc_var5()

FLA_Error FLA_Apply_G_rf_opc_var5 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_opc_var6()

FLA_Error FLA_Apply_G_rf_opc_var6 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A 
)
679{
680 float one = bl1_s1();
681 float zero = bl1_s0();
682 float gamma12;
683 float sigma12;
684 float gamma23;
685 float sigma23;
686 scomplex* a1;
687 scomplex* a2;
688 scomplex* a3;
689 scomplex* g12;
690 scomplex* g23;
691 int i, j, g, k;
692 int nG, nG_app;
693 int n_iter;
694 int n_left;
695 int k_minus_1;
696 int n_fuse;
698
699 k_minus_1 = k_G - 1;
700 nG = n_A - 1;
701 n_fuse = 2;
702
703 // Use the simple variant for nG < (k - 1) or k == 1.
704 if ( nG < k_minus_1 || k_G == 1 )
705 {
707 m_A,
708 n_A,
709 buff_G, rs_G, cs_G,
710 buff_A, rs_A, cs_A );
711 return FLA_SUCCESS;
712 }
713
714
715 // Start-up phase.
716
717 for ( j = 0; j < k_minus_1; ++j )
718 {
719 nG_app = j + 1;
720 n_iter = nG_app / n_fuse;
721 n_left = nG_app % n_fuse;
722
723 for ( i = 0, k = 0, g = nG_app - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
724 {
725 g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
726 g23 = buff_G + (g )*rs_G + (k )*cs_G;
727 a1 = buff_A + (g - 1)*cs_A;
728 a2 = buff_A + (g )*cs_A;
729 a3 = buff_A + (g + 1)*cs_A;
730
731 gamma12 = g12->real;
732 sigma12 = g12->imag;
733 gamma23 = g23->real;
734 sigma23 = g23->imag;
735
736 is_ident12 = ( gamma12 == one && sigma12 == zero );
737 is_ident23 = ( gamma23 == one && sigma23 == zero );
738
739 if ( !is_ident12 && is_ident23 )
740 {
741 // Apply only to columns 1 and 2.
742
744 &gamma12,
745 &sigma12,
746 a1, rs_A,
747 a2, rs_A );
748 }
749 else if ( is_ident12 && !is_ident23 )
750 {
751 // Apply only to columns 2 and 3.
752
754 &gamma23,
755 &sigma23,
756 a2, rs_A,
757 a3, rs_A );
758 }
759 else if ( !is_ident12 && !is_ident23 )
760 {
761 // Apply to all three columns.
762
764 &gamma12,
765 &sigma12,
766 &gamma23,
767 &sigma23,
768 a1, rs_A,
769 a2, rs_A,
770 a3, rs_A );
771 }
772 }
773 //for ( k = 0; k < n_left; k += 1, g -= 1 )
774 if ( n_left == 1 )
775 {
776 g23 = buff_G + (g )*rs_G + (k )*cs_G;
777 a2 = buff_A + (g )*cs_A;
778 a3 = buff_A + (g + 1)*cs_A;
779
780 gamma23 = g23->real;
781 sigma23 = g23->imag;
782
783 is_ident23 = ( gamma23 == one && sigma23 == zero );
784
785 if ( !is_ident23 )
787 &gamma23,
788 &sigma23,
789 a2, rs_A,
790 a3, rs_A );
791 }
792 }
793
794 // Pipeline stage
795
796 for ( j = k_minus_1; j < nG; ++j )
797 {
798 nG_app = k_G;
799 n_iter = nG_app / n_fuse;
800 n_left = nG_app % n_fuse;
801
802 for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
803 {
804 g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
805 g23 = buff_G + (g )*rs_G + (k )*cs_G;
806 a1 = buff_A + (g - 1)*cs_A;
807 a2 = buff_A + (g )*cs_A;
808 a3 = buff_A + (g + 1)*cs_A;
809
810 gamma12 = g12->real;
811 sigma12 = g12->imag;
812 gamma23 = g23->real;
813 sigma23 = g23->imag;
814
815 is_ident12 = ( gamma12 == one && sigma12 == zero );
816 is_ident23 = ( gamma23 == one && sigma23 == zero );
817
818 if ( !is_ident12 && is_ident23 )
819 {
820 // Apply only to columns 1 and 2.
821
823 &gamma12,
824 &sigma12,
825 a1, rs_A,
826 a2, rs_A );
827 }
828 else if ( is_ident12 && !is_ident23 )
829 {
830 // Apply only to columns 2 and 3.
831
833 &gamma23,
834 &sigma23,
835 a2, rs_A,
836 a3, rs_A );
837 }
838 else if ( !is_ident12 && !is_ident23 )
839 {
840 // Apply to all three columns.
841
843 &gamma12,
844 &sigma12,
845 &gamma23,
846 &sigma23,
847 a1, rs_A,
848 a2, rs_A,
849 a3, rs_A );
850 }
851 }
852 //for ( k = 0; k < n_left; k += 1, g -= 1 )
853 if ( n_left == 1 )
854 {
855 g23 = buff_G + (g )*rs_G + (k )*cs_G;
856 a2 = buff_A + (g )*cs_A;
857 a3 = buff_A + (g + 1)*cs_A;
858
859 gamma23 = g23->real;
860 sigma23 = g23->imag;
861
862 is_ident23 = ( gamma23 == one && sigma23 == zero );
863
864 if ( !is_ident23 )
866 &gamma23,
867 &sigma23,
868 a2, rs_A,
869 a3, rs_A );
870 }
871 }
872
873 // Shutdown stage
874
875 for ( j = 1; j < k_G; ++j )
876 {
877 nG_app = k_G - j;
878 n_iter = nG_app / n_fuse;
879 n_left = nG_app % n_fuse;
880
881 for ( i = 0, k = j, g = nG - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
882 {
883 g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
884 g23 = buff_G + (g )*rs_G + (k )*cs_G;
885 a1 = buff_A + (g - 1)*cs_A;
886 a2 = buff_A + (g )*cs_A;
887 a3 = buff_A + (g + 1)*cs_A;
888
889 gamma12 = g12->real;
890 sigma12 = g12->imag;
891 gamma23 = g23->real;
892 sigma23 = g23->imag;
893
894 is_ident12 = ( gamma12 == one && sigma12 == zero );
895 is_ident23 = ( gamma23 == one && sigma23 == zero );
896
897 if ( !is_ident12 && is_ident23 )
898 {
899 // Apply only to columns 1 and 2.
900
902 &gamma12,
903 &sigma12,
904 a1, rs_A,
905 a2, rs_A );
906 }
907 else if ( is_ident12 && !is_ident23 )
908 {
909 // Apply only to columns 2 and 3.
910
912 &gamma23,
913 &sigma23,
914 a2, rs_A,
915 a3, rs_A );
916 }
917 else if ( !is_ident12 && !is_ident23 )
918 {
919 // Apply to all three columns.
920
922 &gamma12,
923 &sigma12,
924 &gamma23,
925 &sigma23,
926 a1, rs_A,
927 a2, rs_A,
928 a3, rs_A );
929 }
930 }
931 //for ( k = 0; k < nG_app_left; k += 1, g -= 1 )
932 if ( n_left == 1 )
933 {
934 g23 = buff_G + (g )*rs_G + (k )*cs_G;
935 a2 = buff_A + (g )*cs_A;
936 a3 = buff_A + (g + 1)*cs_A;
937
938 gamma23 = g23->real;
939 sigma23 = g23->imag;
940
941 is_ident23 = ( gamma23 == one && sigma23 == zero );
942
943 if ( !is_ident23 )
945 &gamma23,
946 &sigma23,
947 a2, rs_A,
948 a3, rs_A );
949 }
950 }
951
952 return FLA_SUCCESS;
953}

References bl1_s0(), bl1_s1(), FLA_Apply_G_rf_opc_var1(), i, and n_left.

Referenced by FLA_Apply_G_rf_opt_var6().

◆ FLA_Apply_G_rf_opc_var7()

FLA_Error FLA_Apply_G_rf_opc_var7 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_opc_var8()

FLA_Error FLA_Apply_G_rf_opc_var8 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_opc_var9()

FLA_Error FLA_Apply_G_rf_opc_var9 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A 
)
652{
653 float one = bl1_s1();
654 float zero = bl1_s0();
655 float gamma12;
656 float sigma12;
657 float gamma23;
658 float sigma23;
659 scomplex* a1;
660 scomplex* a2;
661 scomplex* a3;
662 scomplex* g12;
663 scomplex* g23;
664 int i, j, g, k;
665 int nG, nG_app;
666 int n_iter;
667 int n_left;
668 int k_minus_1;
669 int n_fuse;
671
672 k_minus_1 = k_G - 1;
673 nG = n_A - 1;
674 n_fuse = 2;
675
676 // Use the simple variant for nG < (k - 1) or k == 1.
677 if ( nG < 2*k_minus_1 || k_G == 1 )
678 {
680 m_A,
681 n_A,
682 buff_G, rs_G, cs_G,
683 buff_A, rs_A, cs_A );
684 return FLA_SUCCESS;
685 }
686
687
688 // Start-up phase.
689
690 for ( j = -1; j < k_minus_1; j += n_fuse )
691 {
692 nG_app = j + 1;
693 n_iter = nG_app;
694 n_left = 1;
695
696 for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g )
697 {
698 g12 = buff_G + (g )*rs_G + (k )*cs_G;
699 g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
700 a1 = buff_A + (g )*cs_A;
701 a2 = buff_A + (g + 1)*cs_A;
702 a3 = buff_A + (g + 2)*cs_A;
703
704 gamma12 = g12->real;
705 sigma12 = g12->imag;
706 gamma23 = g23->real;
707 sigma23 = g23->imag;
708
709 is_ident12 = ( gamma12 == one && sigma12 == zero );
710 is_ident23 = ( gamma23 == one && sigma23 == zero );
711
712 if ( !is_ident12 && is_ident23 )
713 {
714 // Apply only to columns 1 and 2.
715
717 &gamma12,
718 &sigma12,
719 a1, rs_A,
720 a2, rs_A );
721 }
722 else if ( is_ident12 && !is_ident23 )
723 {
724 // Apply only to columns 2 and 3.
725
727 &gamma23,
728 &sigma23,
729 a2, rs_A,
730 a3, rs_A );
731 }
732 else if ( !is_ident12 && !is_ident23 )
733 {
734 // Apply to all three columns.
735
737 &gamma12,
738 &sigma12,
739 &gamma23,
740 &sigma23,
741 a1, rs_A,
742 a2, rs_A,
743 a3, rs_A );
744 }
745 }
746
747 if ( n_left == 1 )
748 {
749 g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
750 a2 = buff_A + (g + 1)*cs_A;
751 a3 = buff_A + (g + 2)*cs_A;
752
753 gamma23 = g23->real;
754 sigma23 = g23->imag;
755
756 is_ident23 = ( gamma23 == one && sigma23 == zero );
757
758 if ( !is_ident23 )
760 &gamma23,
761 &sigma23,
762 a2, rs_A,
763 a3, rs_A );
764 }
765 }
766
767 // Pipeline stage
768
769 for ( ; j < nG - 1; j += n_fuse )
770 {
771 nG_app = k_G;
772 n_iter = nG_app;
773 n_left = 0;
774
775 for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g )
776 {
777 g12 = buff_G + (g )*rs_G + (k )*cs_G;
778 g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
779 a1 = buff_A + (g )*cs_A;
780 a2 = buff_A + (g + 1)*cs_A;
781 a3 = buff_A + (g + 2)*cs_A;
782
783 gamma12 = g12->real;
784 sigma12 = g12->imag;
785 gamma23 = g23->real;
786 sigma23 = g23->imag;
787
788 is_ident12 = ( gamma12 == one && sigma12 == zero );
789 is_ident23 = ( gamma23 == one && sigma23 == zero );
790
791 if ( !is_ident12 && is_ident23 )
792 {
793 // Apply only to columns 1 and 2.
794
796 &gamma12,
797 &sigma12,
798 a1, rs_A,
799 a2, rs_A );
800 }
801 else if ( is_ident12 && !is_ident23 )
802 {
803 // Apply only to columns 2 and 3.
804
806 &gamma23,
807 &sigma23,
808 a2, rs_A,
809 a3, rs_A );
810 }
811 else if ( !is_ident12 && !is_ident23 )
812 {
813 // Apply to all three columns.
814
816 &gamma12,
817 &sigma12,
818 &gamma23,
819 &sigma23,
820 a1, rs_A,
821 a2, rs_A,
822 a3, rs_A );
823 }
824 }
825 }
826
827 // Shutdown stage
828
829 for ( j = nG % n_fuse; j < k_G; j += n_fuse )
830 {
831 g = nG - 1;
832 k = j;
833
834 n_left = 1;
835 if ( n_left == 1 )
836 {
837 g12 = buff_G + (g )*rs_G + (k )*cs_G;
838 a1 = buff_A + (g )*cs_A;
839 a2 = buff_A + (g + 1)*cs_A;
840
841 gamma12 = g12->real;
842 sigma12 = g12->imag;
843
844 is_ident12 = ( gamma12 == one && sigma12 == zero );
845
846 if ( !is_ident12 )
848 &gamma12,
849 &sigma12,
850 a1, rs_A,
851 a2, rs_A );
852 ++k;
853 --g;
854 }
855
856 nG_app = k_minus_1 - j;
857 n_iter = nG_app;
858
859 for ( i = 0; i < n_iter; ++i, ++k, --g )
860 {
861 g12 = buff_G + (g )*rs_G + (k )*cs_G;
862 g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
863 a1 = buff_A + (g )*cs_A;
864 a2 = buff_A + (g + 1)*cs_A;
865 a3 = buff_A + (g + 2)*cs_A;
866
867 gamma12 = g12->real;
868 sigma12 = g12->imag;
869 gamma23 = g23->real;
870 sigma23 = g23->imag;
871
872 is_ident12 = ( gamma12 == one && sigma12 == zero );
873 is_ident23 = ( gamma23 == one && sigma23 == zero );
874
875 if ( !is_ident12 && is_ident23 )
876 {
877 // Apply only to columns 1 and 2.
878
880 &gamma12,
881 &sigma12,
882 a1, rs_A,
883 a2, rs_A );
884 }
885 else if ( is_ident12 && !is_ident23 )
886 {
887 // Apply only to columns 2 and 3.
888
890 &gamma23,
891 &sigma23,
892 a2, rs_A,
893 a3, rs_A );
894 }
895 else if ( !is_ident12 && !is_ident23 )
896 {
897 // Apply to all three columns.
898
900 &gamma12,
901 &sigma12,
902 &gamma23,
903 &sigma23,
904 a1, rs_A,
905 a2, rs_A,
906 a3, rs_A );
907 }
908 }
909 }
910
911 return FLA_SUCCESS;
912}

References bl1_s0(), bl1_s1(), FLA_Apply_G_rf_opc_var1(), i, and n_left.

Referenced by FLA_Apply_G_rf_opt_var9().

◆ FLA_Apply_G_rf_opd_var1()

FLA_Error FLA_Apply_G_rf_opd_var1 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double buff_A,
int  rs_A,
int  cs_A 
)
169{
170 double one = bl1_d1();
171 double zero = bl1_d0();
172 int nG_app = n_A - 1;
173 int l, j;
174 double gamma;
175 double sigma;
176 double* a1;
177 double* a2;
178 dcomplex* g1;
179 dcomplex* g11;
180
181 g1 = buff_G;
182
183 for ( l = 0; l < k_G; ++l )
184 {
185 a1 = buff_A;
186 a2 = buff_A + cs_A;
187 g11 = g1;
188
189 for ( j = 0; j < nG_app; ++j )
190 {
191 gamma = g11->real;
192 sigma = g11->imag;
193
194 // Skip the current iteration if the rotation is identity.
195 if ( gamma != one || sigma != zero )
196 {
198 &gamma,
199 &sigma,
200 a1, rs_A,
201 a2, rs_A );
202 }
203
204 a1 += cs_A;
205 a2 += cs_A;
206 g11 += rs_G;
207 }
208
209 g1 += cs_G;
210 }
211
212 return FLA_SUCCESS;
213}

References bl1_d0(), bl1_d1(), i, and dcomplex::real.

Referenced by FLA_Apply_G_lf_opt_var1(), FLA_Apply_G_rf_opd_var2(), FLA_Apply_G_rf_opd_var3(), FLA_Apply_G_rf_opd_var6(), FLA_Apply_G_rf_opd_var9(), and FLA_Apply_G_rf_opt_var1().

◆ FLA_Apply_G_rf_opd_var2()

FLA_Error FLA_Apply_G_rf_opd_var2 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double buff_A,
int  rs_A,
int  cs_A 
)
230{
231 double one = bl1_d1();
232 double zero = bl1_d0();
233 double gamma;
234 double sigma;
235 double* a1;
236 double* a2;
237 dcomplex* g11;
238 int j, g, k;
239 int nG, nG_app;
240 int k_minus_1;
241
242 k_minus_1 = k_G - 1;
243 nG = n_A - 1;
244
245 // Use the simple variant for nG < 2(k - 1).
246 if ( nG < k_minus_1 || k_G == 1 )
247 {
249 m_A,
250 n_A,
251 buff_G, rs_G, cs_G,
252 buff_A, rs_A, cs_A );
253 return FLA_SUCCESS;
254 }
255
256
257 // Start-up phase.
258
259 for ( j = 0; j < k_minus_1; ++j )
260 {
261 nG_app = j + 1;
262
263 for ( k = 0, g = nG_app - 1; k < nG_app; ++k, --g )
264 {
265 g11 = buff_G + (g )*rs_G + (k )*cs_G;
266 a1 = buff_A + (g )*cs_A;
267 a2 = buff_A + (g + 1)*cs_A;
268
269 gamma = g11->real;
270 sigma = g11->imag;
271
272 // Skip the current iteration if the rotation is identity.
273 if ( gamma == one && sigma == zero ) continue;
274
276 &gamma,
277 &sigma,
278 a1, rs_A,
279 a2, rs_A );
280 }
281 }
282
283 // Pipeline stage
284
285 for ( j = k_minus_1; j < nG; ++j )
286 {
287 nG_app = k_G;
288
289 for ( k = 0, g = j; k < nG_app; ++k, --g )
290 {
291 g11 = buff_G + (g )*rs_G + (k )*cs_G;
292 a1 = buff_A + (g )*cs_A;
293 a2 = buff_A + (g + 1)*cs_A;
294
295 gamma = g11->real;
296 sigma = g11->imag;
297
298 // Skip the current iteration if the rotation is identity.
299 if ( gamma == one && sigma == zero ) continue;
300
302 &gamma,
303 &sigma,
304 a1, rs_A,
305 a2, rs_A );
306 }
307 }
308
309 // Shutdown stage
310
311 for ( j = nG - k_minus_1; j < nG; ++j )
312 {
313 nG_app = nG - j;
314
315 for ( k = k_G - nG_app, g = nG - 1; k < k_G; ++k, --g )
316 {
317 g11 = buff_G + (g )*rs_G + (k )*cs_G;
318 a1 = buff_A + (g )*cs_A;
319 a2 = buff_A + (g + 1)*cs_A;
320
321 gamma = g11->real;
322 sigma = g11->imag;
323
324 // Skip the current iteration if the rotation is identity.
325 if ( gamma == one && sigma == zero ) continue;
326
328 &gamma,
329 &sigma,
330 a1, rs_A,
331 a2, rs_A );
332 }
333 }
334
335 return FLA_SUCCESS;
336}
FLA_Error FLA_Apply_G_rf_opd_var1(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_opt_var1.c:164

References bl1_d0(), bl1_d1(), FLA_Apply_G_rf_opd_var1(), and i.

Referenced by FLA_Apply_G_rf_opt_var2().

◆ FLA_Apply_G_rf_opd_var3()

FLA_Error FLA_Apply_G_rf_opd_var3 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double buff_A,
int  rs_A,
int  cs_A 
)
570{
571 double one = bl1_d1();
572 double zero = bl1_d0();
573 double gamma23_k1;
574 double sigma23_k1;
575 double gamma34_k1;
576 double sigma34_k1;
577 double gamma12_k2;
578 double sigma12_k2;
579 double gamma23_k2;
580 double sigma23_k2;
581 double* a1;
582 double* a2;
583 double* a3;
584 double* a4;
589 int i, j, g, k;
590 int nG, nG_app;
591 int n_iter;
592 int n_left;
593 int k_minus_1;
594 int n_fuse;
595 int k_fuse;
598 int has_ident;
599
600 k_minus_1 = k_G - 1;
601 nG = n_A - 1;
602 n_fuse = 2;
603 k_fuse = 2;
604
605 // Use the simple variant for nG < (k - 1) or k == 1.
606 if ( nG < 2*k_minus_1 || k_G == 1 )
607 {
609 m_A,
610 n_A,
611 buff_G, rs_G, cs_G,
612 buff_A, rs_A, cs_A );
613 return FLA_SUCCESS;
614 }
615
616
617 // Start-up phase.
618
619 for ( j = -1; j < k_minus_1; j += n_fuse )
620 {
621 nG_app = j + 2;
622 n_iter = nG_app / k_fuse;
623 n_left = 1;
624
625 for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
626 {
627 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
628 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
629 g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
630 g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
631 a1 = buff_A + (g - 1)*cs_A;
632 a2 = buff_A + (g )*cs_A;
633 a3 = buff_A + (g + 1)*cs_A;
634 a4 = buff_A + (g + 2)*cs_A;
635
636 gamma23_k1 = g23_k1->real;
637 sigma23_k1 = g23_k1->imag;
638 gamma34_k1 = g34_k1->real;
639 sigma34_k1 = g34_k1->imag;
640 gamma12_k2 = g12_k2->real;
641 sigma12_k2 = g12_k2->imag;
642 gamma23_k2 = g23_k2->real;
643 sigma23_k2 = g23_k2->imag;
644
651
652 if ( has_ident )
653 {
654 // Apply to pairs of columns as needed.
655
656 if ( !is_ident23_k1 )
658 &gamma23_k1,
659 &sigma23_k1,
660 a2, rs_A,
661 a3, rs_A );
662
663 if ( !is_ident34_k1 )
665 &gamma34_k1,
666 &sigma34_k1,
667 a3, rs_A,
668 a4, rs_A );
669
670 if ( !is_ident12_k2 )
672 &gamma12_k2,
673 &sigma12_k2,
674 a1, rs_A,
675 a2, rs_A );
676
677 if ( !is_ident23_k2 )
679 &gamma23_k2,
680 &sigma23_k2,
681 a2, rs_A,
682 a3, rs_A );
683 }
684 else
685 {
686 // Apply to all four columns.
687
689 &gamma23_k1,
690 &sigma23_k1,
691 &gamma34_k1,
692 &sigma34_k1,
693 &gamma12_k2,
694 &sigma12_k2,
695 &gamma23_k2,
696 &sigma23_k2,
697 a1, rs_A,
698 a2, rs_A,
699 a3, rs_A,
700 a4, rs_A );
701 }
702 }
703
704 if ( n_left == 1 )
705 {
706 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
707 a3 = buff_A + (g + 1)*cs_A;
708 a4 = buff_A + (g + 2)*cs_A;
709
710 gamma34_k1 = g34_k1->real;
711 sigma34_k1 = g34_k1->imag;
712
714
715 if ( !is_ident34_k1 )
717 &gamma34_k1,
718 &sigma34_k1,
719 a3, rs_A,
720 a4, rs_A );
721 }
722 }
723
724 // Pipeline stage
725
726 for ( ; j < nG - 1; j += n_fuse )
727 {
728 nG_app = k_G;
729 n_iter = nG_app / k_fuse;
730 n_left = nG_app % k_fuse;
731
732 for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
733 {
734 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
735 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
736 g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
737 g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
738 a1 = buff_A + (g - 1)*cs_A;
739 a2 = buff_A + (g )*cs_A;
740 a3 = buff_A + (g + 1)*cs_A;
741 a4 = buff_A + (g + 2)*cs_A;
742
743 gamma23_k1 = g23_k1->real;
744 sigma23_k1 = g23_k1->imag;
745 gamma34_k1 = g34_k1->real;
746 sigma34_k1 = g34_k1->imag;
747 gamma12_k2 = g12_k2->real;
748 sigma12_k2 = g12_k2->imag;
749 gamma23_k2 = g23_k2->real;
750 sigma23_k2 = g23_k2->imag;
751
758
759 if ( has_ident )
760 {
761 // Apply to pairs of columns as needed.
762
763 if ( !is_ident23_k1 )
765 &gamma23_k1,
766 &sigma23_k1,
767 a2, rs_A,
768 a3, rs_A );
769
770 if ( !is_ident34_k1 )
772 &gamma34_k1,
773 &sigma34_k1,
774 a3, rs_A,
775 a4, rs_A );
776
777 if ( !is_ident12_k2 )
779 &gamma12_k2,
780 &sigma12_k2,
781 a1, rs_A,
782 a2, rs_A );
783
784 if ( !is_ident23_k2 )
786 &gamma23_k2,
787 &sigma23_k2,
788 a2, rs_A,
789 a3, rs_A );
790 }
791 else
792 {
793 // Apply to all four columns.
794
796 &gamma23_k1,
797 &sigma23_k1,
798 &gamma34_k1,
799 &sigma34_k1,
800 &gamma12_k2,
801 &sigma12_k2,
802 &gamma23_k2,
803 &sigma23_k2,
804 a1, rs_A,
805 a2, rs_A,
806 a3, rs_A,
807 a4, rs_A );
808 }
809 }
810
811 if ( n_left == 1 )
812 {
813 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
814 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
815 a2 = buff_A + (g )*cs_A;
816 a3 = buff_A + (g + 1)*cs_A;
817 a4 = buff_A + (g + 2)*cs_A;
818
819 gamma23_k1 = g23_k1->real;
820 sigma23_k1 = g23_k1->imag;
821 gamma34_k1 = g34_k1->real;
822 sigma34_k1 = g34_k1->imag;
823
826
828 {
830 &gamma23_k1,
831 &sigma23_k1,
832 a2, rs_A,
833 a3, rs_A );
834 }
835 else if ( is_ident23_k1 && !is_ident34_k1 )
836 {
838 &gamma34_k1,
839 &sigma34_k1,
840 a3, rs_A,
841 a4, rs_A );
842 }
843 else
844 {
846 &gamma23_k1,
847 &sigma23_k1,
848 &gamma34_k1,
849 &sigma34_k1,
850 a2, rs_A,
851 a3, rs_A,
852 a4, rs_A );
853 }
854 }
855 }
856
857 // Shutdown stage
858
859 for ( j = nG % n_fuse; j < k_G; j += n_fuse )
860 {
861 g = nG - 1;
862 k = j;
863
864 //n_left = 1;
865 //if ( n_left == 1 )
866 {
867 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
868 a2 = buff_A + (g )*cs_A;
869 a3 = buff_A + (g + 1)*cs_A;
870
871 gamma23_k1 = g23_k1->real;
872 sigma23_k1 = g23_k1->imag;
873
875
876 if ( !is_ident23_k1 )
878 &gamma23_k1,
879 &sigma23_k1,
880 a2, rs_A,
881 a3, rs_A );
882 ++k;
883 --g;
884 }
885
886 nG_app = k_minus_1 - j;
887 n_iter = nG_app / k_fuse;
888 n_left = nG_app % k_fuse;
889
890 for ( i = 0; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
891 {
892 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
893 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
894 g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
895 g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
896 a1 = buff_A + (g - 1)*cs_A;
897 a2 = buff_A + (g )*cs_A;
898 a3 = buff_A + (g + 1)*cs_A;
899 a4 = buff_A + (g + 2)*cs_A;
900
901 gamma23_k1 = g23_k1->real;
902 sigma23_k1 = g23_k1->imag;
903 gamma34_k1 = g34_k1->real;
904 sigma34_k1 = g34_k1->imag;
905 gamma12_k2 = g12_k2->real;
906 sigma12_k2 = g12_k2->imag;
907 gamma23_k2 = g23_k2->real;
908 sigma23_k2 = g23_k2->imag;
909
916
917 if ( has_ident )
918 {
919 // Apply to pairs of columns as needed.
920
921 if ( !is_ident23_k1 )
923 &gamma23_k1,
924 &sigma23_k1,
925 a2, rs_A,
926 a3, rs_A );
927
928 if ( !is_ident34_k1 )
930 &gamma34_k1,
931 &sigma34_k1,
932 a3, rs_A,
933 a4, rs_A );
934
935 if ( !is_ident12_k2 )
937 &gamma12_k2,
938 &sigma12_k2,
939 a1, rs_A,
940 a2, rs_A );
941
942 if ( !is_ident23_k2 )
944 &gamma23_k2,
945 &sigma23_k2,
946 a2, rs_A,
947 a3, rs_A );
948 }
949 else
950 {
951 // Apply to all four columns.
952
954 &gamma23_k1,
955 &sigma23_k1,
956 &gamma34_k1,
957 &sigma34_k1,
958 &gamma12_k2,
959 &sigma12_k2,
960 &gamma23_k2,
961 &sigma23_k2,
962 a1, rs_A,
963 a2, rs_A,
964 a3, rs_A,
965 a4, rs_A );
966 }
967 }
968
969 if ( n_left == 1 )
970 {
971 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
972 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
973 a2 = buff_A + (g )*cs_A;
974 a3 = buff_A + (g + 1)*cs_A;
975 a4 = buff_A + (g + 2)*cs_A;
976
977 gamma23_k1 = g23_k1->real;
978 sigma23_k1 = g23_k1->imag;
979 gamma34_k1 = g34_k1->real;
980 sigma34_k1 = g34_k1->imag;
981
984
986 {
988 &gamma23_k1,
989 &sigma23_k1,
990 a2, rs_A,
991 a3, rs_A );
992 }
993 else if ( is_ident23_k1 && !is_ident34_k1 )
994 {
996 &gamma34_k1,
997 &sigma34_k1,
998 a3, rs_A,
999 a4, rs_A );
1000 }
1001 else
1002 {
1004 &gamma23_k1,
1005 &sigma23_k1,
1006 &gamma34_k1,
1007 &sigma34_k1,
1008 a2, rs_A,
1009 a3, rs_A,
1010 a4, rs_A );
1011 }
1012 }
1013 }
1014
1015 return FLA_SUCCESS;
1016}

References bl1_d0(), bl1_d1(), FLA_Apply_G_rf_opd_var1(), i, and n_left.

Referenced by FLA_Apply_G_rf_bld_var3(), and FLA_Apply_G_rf_opt_var3().

◆ FLA_Apply_G_rf_opd_var4()

FLA_Error FLA_Apply_G_rf_opd_var4 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_opd_var5()

FLA_Error FLA_Apply_G_rf_opd_var5 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_opd_var6()

FLA_Error FLA_Apply_G_rf_opd_var6 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double buff_A,
int  rs_A,
int  cs_A 
)
398{
399 double one = bl1_d1();
400 double zero = bl1_d0();
401 double gamma12;
402 double sigma12;
403 double gamma23;
404 double sigma23;
405 double* a1;
406 double* a2;
407 double* a3;
408 dcomplex* g12;
409 dcomplex* g23;
410 int i, j, g, k;
411 int nG, nG_app;
412 int n_iter;
413 int n_left;
414 int k_minus_1;
415 int n_fuse;
417
418 k_minus_1 = k_G - 1;
419 nG = n_A - 1;
420 n_fuse = 2;
421
422 // Use the simple variant for nG < (k - 1) or k == 1.
423 if ( nG < k_minus_1 || k_G == 1 )
424 {
426 m_A,
427 n_A,
428 buff_G, rs_G, cs_G,
429 buff_A, rs_A, cs_A );
430 return FLA_SUCCESS;
431 }
432
433
434 // Start-up phase.
435
436 for ( j = 0; j < k_minus_1; ++j )
437 {
438 nG_app = j + 1;
439 n_iter = nG_app / n_fuse;
440 n_left = nG_app % n_fuse;
441
442 for ( i = 0, k = 0, g = nG_app - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
443 {
444 g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
445 g23 = buff_G + (g )*rs_G + (k )*cs_G;
446 a1 = buff_A + (g - 1)*cs_A;
447 a2 = buff_A + (g )*cs_A;
448 a3 = buff_A + (g + 1)*cs_A;
449
450 gamma12 = g12->real;
451 sigma12 = g12->imag;
452 gamma23 = g23->real;
453 sigma23 = g23->imag;
454
455 is_ident12 = ( gamma12 == one && sigma12 == zero );
456 is_ident23 = ( gamma23 == one && sigma23 == zero );
457
458 if ( !is_ident12 && is_ident23 )
459 {
460 // Apply only to columns 1 and 2.
461
463 &gamma12,
464 &sigma12,
465 a1, rs_A,
466 a2, rs_A );
467 }
468 else if ( is_ident12 && !is_ident23 )
469 {
470 // Apply only to columns 2 and 3.
471
473 &gamma23,
474 &sigma23,
475 a2, rs_A,
476 a3, rs_A );
477 }
478 else if ( !is_ident12 && !is_ident23 )
479 {
480 // Apply to all three columns.
481
483 &gamma12,
484 &sigma12,
485 &gamma23,
486 &sigma23,
487 a1, rs_A,
488 a2, rs_A,
489 a3, rs_A );
490 }
491 }
492 //for ( k = 0; k < n_left; k += 1, g -= 1 )
493 if ( n_left == 1 )
494 {
495 g23 = buff_G + (g )*rs_G + (k )*cs_G;
496 a2 = buff_A + (g )*cs_A;
497 a3 = buff_A + (g + 1)*cs_A;
498
499 gamma23 = g23->real;
500 sigma23 = g23->imag;
501
502 is_ident23 = ( gamma23 == one && sigma23 == zero );
503
504 if ( !is_ident23 )
506 &gamma23,
507 &sigma23,
508 a2, rs_A,
509 a3, rs_A );
510 }
511 }
512
513 // Pipeline stage
514
515 for ( j = k_minus_1; j < nG; ++j )
516 {
517 nG_app = k_G;
518 n_iter = nG_app / n_fuse;
519 n_left = nG_app % n_fuse;
520
521 for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
522 {
523 g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
524 g23 = buff_G + (g )*rs_G + (k )*cs_G;
525 a1 = buff_A + (g - 1)*cs_A;
526 a2 = buff_A + (g )*cs_A;
527 a3 = buff_A + (g + 1)*cs_A;
528
529 gamma12 = g12->real;
530 sigma12 = g12->imag;
531 gamma23 = g23->real;
532 sigma23 = g23->imag;
533
534 is_ident12 = ( gamma12 == one && sigma12 == zero );
535 is_ident23 = ( gamma23 == one && sigma23 == zero );
536
537 if ( !is_ident12 && is_ident23 )
538 {
539 // Apply only to columns 1 and 2.
540
542 &gamma12,
543 &sigma12,
544 a1, rs_A,
545 a2, rs_A );
546 }
547 else if ( is_ident12 && !is_ident23 )
548 {
549 // Apply only to columns 2 and 3.
550
552 &gamma23,
553 &sigma23,
554 a2, rs_A,
555 a3, rs_A );
556 }
557 else if ( !is_ident12 && !is_ident23 )
558 {
559 // Apply to all three columns.
560
562 &gamma12,
563 &sigma12,
564 &gamma23,
565 &sigma23,
566 a1, rs_A,
567 a2, rs_A,
568 a3, rs_A );
569 }
570 }
571 //for ( k = 0; k < n_left; k += 1, g -= 1 )
572 if ( n_left == 1 )
573 {
574 g23 = buff_G + (g )*rs_G + (k )*cs_G;
575 a2 = buff_A + (g )*cs_A;
576 a3 = buff_A + (g + 1)*cs_A;
577
578 gamma23 = g23->real;
579 sigma23 = g23->imag;
580
581 is_ident23 = ( gamma23 == one && sigma23 == zero );
582
583 if ( !is_ident23 )
585 &gamma23,
586 &sigma23,
587 a2, rs_A,
588 a3, rs_A );
589 }
590 }
591
592 // Shutdown stage
593
594 for ( j = 1; j < k_G; ++j )
595 {
596 nG_app = k_G - j;
597 n_iter = nG_app / n_fuse;
598 n_left = nG_app % n_fuse;
599
600 for ( i = 0, k = j, g = nG - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
601 {
602 g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
603 g23 = buff_G + (g )*rs_G + (k )*cs_G;
604 a1 = buff_A + (g - 1)*cs_A;
605 a2 = buff_A + (g )*cs_A;
606 a3 = buff_A + (g + 1)*cs_A;
607
608 gamma12 = g12->real;
609 sigma12 = g12->imag;
610 gamma23 = g23->real;
611 sigma23 = g23->imag;
612
613 is_ident12 = ( gamma12 == one && sigma12 == zero );
614 is_ident23 = ( gamma23 == one && sigma23 == zero );
615
616 if ( !is_ident12 && is_ident23 )
617 {
618 // Apply only to columns 1 and 2.
619
621 &gamma12,
622 &sigma12,
623 a1, rs_A,
624 a2, rs_A );
625 }
626 else if ( is_ident12 && !is_ident23 )
627 {
628 // Apply only to columns 2 and 3.
629
631 &gamma23,
632 &sigma23,
633 a2, rs_A,
634 a3, rs_A );
635 }
636 else if ( !is_ident12 && !is_ident23 )
637 {
638 // Apply to all three columns.
639
641 &gamma12,
642 &sigma12,
643 &gamma23,
644 &sigma23,
645 a1, rs_A,
646 a2, rs_A,
647 a3, rs_A );
648 }
649 }
650 //for ( k = 0; k < nG_app_left; k += 1, g -= 1 )
651 if ( n_left == 1 )
652 {
653 g23 = buff_G + (g )*rs_G + (k )*cs_G;
654 a2 = buff_A + (g )*cs_A;
655 a3 = buff_A + (g + 1)*cs_A;
656
657 gamma23 = g23->real;
658 sigma23 = g23->imag;
659
660 is_ident23 = ( gamma23 == one && sigma23 == zero );
661
662 if ( !is_ident23 )
664 &gamma23,
665 &sigma23,
666 a2, rs_A,
667 a3, rs_A );
668 }
669 }
670
671 return FLA_SUCCESS;
672}

References bl1_d0(), bl1_d1(), FLA_Apply_G_rf_opd_var1(), i, and n_left.

Referenced by FLA_Apply_G_rf_opt_var6().

◆ FLA_Apply_G_rf_opd_var7()

FLA_Error FLA_Apply_G_rf_opd_var7 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_opd_var8()

FLA_Error FLA_Apply_G_rf_opd_var8 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_opd_var9()

FLA_Error FLA_Apply_G_rf_opd_var9 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double buff_A,
int  rs_A,
int  cs_A 
)
385{
386 double one = bl1_d1();
387 double zero = bl1_d0();
388 double gamma12;
389 double sigma12;
390 double gamma23;
391 double sigma23;
392 double* a1;
393 double* a2;
394 double* a3;
395 dcomplex* g12;
396 dcomplex* g23;
397 int i, j, g, k;
398 int nG, nG_app;
399 int n_iter;
400 int n_left;
401 int k_minus_1;
402 int n_fuse;
404
405 k_minus_1 = k_G - 1;
406 nG = n_A - 1;
407 n_fuse = 2;
408
409 // Use the simple variant for nG < (k - 1) or k == 1.
410 if ( nG < 2*k_minus_1 || k_G == 1 )
411 {
413 m_A,
414 n_A,
415 buff_G, rs_G, cs_G,
416 buff_A, rs_A, cs_A );
417 return FLA_SUCCESS;
418 }
419
420
421 // Start-up phase.
422
423 for ( j = -1; j < k_minus_1; j += n_fuse )
424 {
425 nG_app = j + 1;
426 n_iter = nG_app;
427 n_left = 1;
428
429 for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g )
430 {
431 g12 = buff_G + (g )*rs_G + (k )*cs_G;
432 g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
433 a1 = buff_A + (g )*cs_A;
434 a2 = buff_A + (g + 1)*cs_A;
435 a3 = buff_A + (g + 2)*cs_A;
436
437 gamma12 = g12->real;
438 sigma12 = g12->imag;
439 gamma23 = g23->real;
440 sigma23 = g23->imag;
441
442 is_ident12 = ( gamma12 == one && sigma12 == zero );
443 is_ident23 = ( gamma23 == one && sigma23 == zero );
444
445 if ( !is_ident12 && is_ident23 )
446 {
447 // Apply only to columns 1 and 2.
448
450 &gamma12,
451 &sigma12,
452 a1, rs_A,
453 a2, rs_A );
454 }
455 else if ( is_ident12 && !is_ident23 )
456 {
457 // Apply only to columns 2 and 3.
458
460 &gamma23,
461 &sigma23,
462 a2, rs_A,
463 a3, rs_A );
464 }
465 else if ( !is_ident12 && !is_ident23 )
466 {
467 // Apply to all three columns.
468
470 &gamma12,
471 &sigma12,
472 &gamma23,
473 &sigma23,
474 a1, rs_A,
475 a2, rs_A,
476 a3, rs_A );
477 }
478 }
479
480 if ( n_left == 1 )
481 {
482 g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
483 a2 = buff_A + (g + 1)*cs_A;
484 a3 = buff_A + (g + 2)*cs_A;
485
486 gamma23 = g23->real;
487 sigma23 = g23->imag;
488
489 is_ident23 = ( gamma23 == one && sigma23 == zero );
490
491 if ( !is_ident23 )
493 &gamma23,
494 &sigma23,
495 a2, rs_A,
496 a3, rs_A );
497 }
498 }
499
500 // Pipeline stage
501
502 for ( ; j < nG - 1; j += n_fuse )
503 {
504 nG_app = k_G;
505 n_iter = nG_app;
506 n_left = 0;
507
508 for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g )
509 {
510 g12 = buff_G + (g )*rs_G + (k )*cs_G;
511 g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
512 a1 = buff_A + (g )*cs_A;
513 a2 = buff_A + (g + 1)*cs_A;
514 a3 = buff_A + (g + 2)*cs_A;
515
516 gamma12 = g12->real;
517 sigma12 = g12->imag;
518 gamma23 = g23->real;
519 sigma23 = g23->imag;
520
521 is_ident12 = ( gamma12 == one && sigma12 == zero );
522 is_ident23 = ( gamma23 == one && sigma23 == zero );
523
524 if ( !is_ident12 && is_ident23 )
525 {
526 // Apply only to columns 1 and 2.
527
529 &gamma12,
530 &sigma12,
531 a1, rs_A,
532 a2, rs_A );
533 }
534 else if ( is_ident12 && !is_ident23 )
535 {
536 // Apply only to columns 2 and 3.
537
539 &gamma23,
540 &sigma23,
541 a2, rs_A,
542 a3, rs_A );
543 }
544 else if ( !is_ident12 && !is_ident23 )
545 {
546 // Apply to all three columns.
547
549 &gamma12,
550 &sigma12,
551 &gamma23,
552 &sigma23,
553 a1, rs_A,
554 a2, rs_A,
555 a3, rs_A );
556 }
557 }
558 }
559
560 // Shutdown stage
561
562 for ( j = nG % n_fuse; j < k_G; j += n_fuse )
563 {
564 g = nG - 1;
565 k = j;
566
567 n_left = 1;
568 if ( n_left == 1 )
569 {
570 g12 = buff_G + (g )*rs_G + (k )*cs_G;
571 a1 = buff_A + (g )*cs_A;
572 a2 = buff_A + (g + 1)*cs_A;
573
574 gamma12 = g12->real;
575 sigma12 = g12->imag;
576
577 is_ident12 = ( gamma12 == one && sigma12 == zero );
578
579 if ( !is_ident12 )
581 &gamma12,
582 &sigma12,
583 a1, rs_A,
584 a2, rs_A );
585 ++k;
586 --g;
587 }
588
589 nG_app = k_minus_1 - j;
590 n_iter = nG_app;
591
592 for ( i = 0; i < n_iter; ++i, ++k, --g )
593 {
594 g12 = buff_G + (g )*rs_G + (k )*cs_G;
595 g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
596 a1 = buff_A + (g )*cs_A;
597 a2 = buff_A + (g + 1)*cs_A;
598 a3 = buff_A + (g + 2)*cs_A;
599
600 gamma12 = g12->real;
601 sigma12 = g12->imag;
602 gamma23 = g23->real;
603 sigma23 = g23->imag;
604
605 is_ident12 = ( gamma12 == one && sigma12 == zero );
606 is_ident23 = ( gamma23 == one && sigma23 == zero );
607
608 if ( !is_ident12 && is_ident23 )
609 {
610 // Apply only to columns 1 and 2.
611
613 &gamma12,
614 &sigma12,
615 a1, rs_A,
616 a2, rs_A );
617 }
618 else if ( is_ident12 && !is_ident23 )
619 {
620 // Apply only to columns 2 and 3.
621
623 &gamma23,
624 &sigma23,
625 a2, rs_A,
626 a3, rs_A );
627 }
628 else if ( !is_ident12 && !is_ident23 )
629 {
630 // Apply to all three columns.
631
633 &gamma12,
634 &sigma12,
635 &gamma23,
636 &sigma23,
637 a1, rs_A,
638 a2, rs_A,
639 a3, rs_A );
640 }
641 }
642 }
643
644 return FLA_SUCCESS;
645}

References bl1_d0(), bl1_d1(), FLA_Apply_G_rf_opd_var1(), i, and n_left.

Referenced by FLA_Apply_G_rf_opt_var9().

◆ FLA_Apply_G_rf_ops_var1()

FLA_Error FLA_Apply_G_rf_ops_var1 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
float buff_A,
int  rs_A,
int  cs_A 
)
118{
119 float one = bl1_s1();
120 float zero = bl1_s0();
121 int nG_app = n_A - 1;
122 int l, j;
123 float gamma;
124 float sigma;
125 float* a1;
126 float* a2;
127 scomplex* g1;
128 scomplex* g11;
129
130 g1 = buff_G;
131
132 for ( l = 0; l < k_G; ++l )
133 {
134 a1 = buff_A;
135 a2 = buff_A + cs_A;
136 g11 = g1;
137
138 for ( j = 0; j < nG_app; ++j )
139 {
140 gamma = g11->real;
141 sigma = g11->imag;
142
143 // Skip the current iteration if the rotation is identity.
144 if ( gamma != one || sigma != zero )
145 {
147 &gamma,
148 &sigma,
149 a1, rs_A,
150 a2, rs_A );
151 }
152
153 a1 += cs_A;
154 a2 += cs_A;
155 g11 += rs_G;
156 }
157
158 g1 += cs_G;
159 }
160
161 return FLA_SUCCESS;
162}

References bl1_s0(), bl1_s1(), i, and scomplex::real.

Referenced by FLA_Apply_G_lf_opt_var1(), FLA_Apply_G_rf_ops_var2(), FLA_Apply_G_rf_ops_var3(), FLA_Apply_G_rf_ops_var6(), FLA_Apply_G_rf_ops_var9(), and FLA_Apply_G_rf_opt_var1().

◆ FLA_Apply_G_rf_ops_var2()

FLA_Error FLA_Apply_G_rf_ops_var2 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
float buff_A,
int  rs_A,
int  cs_A 
)
117{
118 float one = bl1_s1();
119 float zero = bl1_s0();
120 float gamma;
121 float sigma;
122 float* a1;
123 float* a2;
124 scomplex* g11;
125 int j, g, k;
126 int nG, nG_app;
127 int k_minus_1;
128
129 k_minus_1 = k_G - 1;
130 nG = n_A - 1;
131
132 // Use the simple variant for nG < 2(k - 1).
133 if ( nG < k_minus_1 || k_G == 1 )
134 {
136 m_A,
137 n_A,
138 buff_G, rs_G, cs_G,
139 buff_A, rs_A, cs_A );
140 return FLA_SUCCESS;
141 }
142
143
144 // Start-up phase.
145
146 for ( j = 0; j < k_minus_1; ++j )
147 {
148 nG_app = j + 1;
149
150 for ( k = 0, g = nG_app - 1; k < nG_app; ++k, --g )
151 {
152 g11 = buff_G + (g )*rs_G + (k )*cs_G;
153 a1 = buff_A + (g )*cs_A;
154 a2 = buff_A + (g + 1)*cs_A;
155
156 gamma = g11->real;
157 sigma = g11->imag;
158
159 // Skip the current iteration if the rotation is identity.
160 if ( gamma == one && sigma == zero ) continue;
161
163 &gamma,
164 &sigma,
165 a1, rs_A,
166 a2, rs_A );
167 }
168 }
169
170 // Pipeline stage
171
172 for ( j = k_minus_1; j < nG; ++j )
173 {
174 nG_app = k_G;
175
176 for ( k = 0, g = j; k < nG_app; ++k, --g )
177 {
178 g11 = buff_G + (g )*rs_G + (k )*cs_G;
179 a1 = buff_A + (g )*cs_A;
180 a2 = buff_A + (g + 1)*cs_A;
181
182 gamma = g11->real;
183 sigma = g11->imag;
184
185 // Skip the current iteration if the rotation is identity.
186 if ( gamma == one && sigma == zero ) continue;
187
189 &gamma,
190 &sigma,
191 a1, rs_A,
192 a2, rs_A );
193 }
194 }
195
196 // Shutdown stage
197
198 for ( j = nG - k_minus_1; j < nG; ++j )
199 {
200 nG_app = nG - j;
201
202 for ( k = k_G - nG_app, g = nG - 1; k < k_G; ++k, --g )
203 {
204 g11 = buff_G + (g )*rs_G + (k )*cs_G;
205 a1 = buff_A + (g )*cs_A;
206 a2 = buff_A + (g + 1)*cs_A;
207
208 gamma = g11->real;
209 sigma = g11->imag;
210
211 // Skip the current iteration if the rotation is identity.
212 if ( gamma == one && sigma == zero ) continue;
213
215 &gamma,
216 &sigma,
217 a1, rs_A,
218 a2, rs_A );
219 }
220 }
221
222 return FLA_SUCCESS;
223}
FLA_Error FLA_Apply_G_rf_ops_var1(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_opt_var1.c:113

References bl1_s0(), bl1_s1(), FLA_Apply_G_rf_ops_var1(), and i.

Referenced by FLA_Apply_G_rf_opt_var2().

◆ FLA_Apply_G_rf_ops_var3()

FLA_Error FLA_Apply_G_rf_ops_var3 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
float buff_A,
int  rs_A,
int  cs_A 
)
117{
118 float one = bl1_s1();
119 float zero = bl1_s0();
120 float gamma23_k1;
121 float sigma23_k1;
122 float gamma34_k1;
123 float sigma34_k1;
124 float gamma12_k2;
125 float sigma12_k2;
126 float gamma23_k2;
127 float sigma23_k2;
128 float* a1;
129 float* a2;
130 float* a3;
131 float* a4;
136 int i, j, g, k;
137 int nG, nG_app;
138 int n_iter;
139 int n_left;
140 int k_minus_1;
141 int n_fuse;
142 int k_fuse;
145 int has_ident;
146
147 k_minus_1 = k_G - 1;
148 nG = n_A - 1;
149 n_fuse = 2;
150 k_fuse = 2;
151
152 // Use the simple variant for nG < (k - 1) or k == 1.
153 if ( nG < 2*k_minus_1 || k_G == 1 )
154 {
156 m_A,
157 n_A,
158 buff_G, rs_G, cs_G,
159 buff_A, rs_A, cs_A );
160 return FLA_SUCCESS;
161 }
162
163
164 // Start-up phase.
165
166 for ( j = -1; j < k_minus_1; j += n_fuse )
167 {
168 nG_app = j + 2;
169 n_iter = nG_app / k_fuse;
170 n_left = 1;
171
172 for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
173 {
174 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
175 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
176 g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
177 g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
178 a1 = buff_A + (g - 1)*cs_A;
179 a2 = buff_A + (g )*cs_A;
180 a3 = buff_A + (g + 1)*cs_A;
181 a4 = buff_A + (g + 2)*cs_A;
182
183 gamma23_k1 = g23_k1->real;
184 sigma23_k1 = g23_k1->imag;
185 gamma34_k1 = g34_k1->real;
186 sigma34_k1 = g34_k1->imag;
187 gamma12_k2 = g12_k2->real;
188 sigma12_k2 = g12_k2->imag;
189 gamma23_k2 = g23_k2->real;
190 sigma23_k2 = g23_k2->imag;
191
198
199 if ( has_ident )
200 {
201 // Apply to pairs of columns as needed.
202
203 if ( !is_ident23_k1 )
205 &gamma23_k1,
206 &sigma23_k1,
207 a2, rs_A,
208 a3, rs_A );
209
210 if ( !is_ident34_k1 )
212 &gamma34_k1,
213 &sigma34_k1,
214 a3, rs_A,
215 a4, rs_A );
216
217 if ( !is_ident12_k2 )
219 &gamma12_k2,
220 &sigma12_k2,
221 a1, rs_A,
222 a2, rs_A );
223
224 if ( !is_ident23_k2 )
226 &gamma23_k2,
227 &sigma23_k2,
228 a2, rs_A,
229 a3, rs_A );
230 }
231 else
232 {
233 // Apply to all four columns.
234
236 &gamma23_k1,
237 &sigma23_k1,
238 &gamma34_k1,
239 &sigma34_k1,
240 &gamma12_k2,
241 &sigma12_k2,
242 &gamma23_k2,
243 &sigma23_k2,
244 a1, rs_A,
245 a2, rs_A,
246 a3, rs_A,
247 a4, rs_A );
248 }
249 }
250
251 if ( n_left == 1 )
252 {
253 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
254 a3 = buff_A + (g + 1)*cs_A;
255 a4 = buff_A + (g + 2)*cs_A;
256
257 gamma34_k1 = g34_k1->real;
258 sigma34_k1 = g34_k1->imag;
259
261
262 if ( !is_ident34_k1 )
264 &gamma34_k1,
265 &sigma34_k1,
266 a3, rs_A,
267 a4, rs_A );
268 }
269 }
270
271 // Pipeline stage
272
273 for ( ; j < nG - 1; j += n_fuse )
274 {
275 nG_app = k_G;
276 n_iter = nG_app / k_fuse;
277 n_left = nG_app % k_fuse;
278
279 for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
280 {
281 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
282 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
283 g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
284 g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
285 a1 = buff_A + (g - 1)*cs_A;
286 a2 = buff_A + (g )*cs_A;
287 a3 = buff_A + (g + 1)*cs_A;
288 a4 = buff_A + (g + 2)*cs_A;
289
290 gamma23_k1 = g23_k1->real;
291 sigma23_k1 = g23_k1->imag;
292 gamma34_k1 = g34_k1->real;
293 sigma34_k1 = g34_k1->imag;
294 gamma12_k2 = g12_k2->real;
295 sigma12_k2 = g12_k2->imag;
296 gamma23_k2 = g23_k2->real;
297 sigma23_k2 = g23_k2->imag;
298
305
306 if ( has_ident )
307 {
308 // Apply to pairs of columns as needed.
309
310 if ( !is_ident23_k1 )
312 &gamma23_k1,
313 &sigma23_k1,
314 a2, rs_A,
315 a3, rs_A );
316
317 if ( !is_ident34_k1 )
319 &gamma34_k1,
320 &sigma34_k1,
321 a3, rs_A,
322 a4, rs_A );
323
324 if ( !is_ident12_k2 )
326 &gamma12_k2,
327 &sigma12_k2,
328 a1, rs_A,
329 a2, rs_A );
330
331 if ( !is_ident23_k2 )
333 &gamma23_k2,
334 &sigma23_k2,
335 a2, rs_A,
336 a3, rs_A );
337 }
338 else
339 {
340 // Apply to all four columns.
341
343 &gamma23_k1,
344 &sigma23_k1,
345 &gamma34_k1,
346 &sigma34_k1,
347 &gamma12_k2,
348 &sigma12_k2,
349 &gamma23_k2,
350 &sigma23_k2,
351 a1, rs_A,
352 a2, rs_A,
353 a3, rs_A,
354 a4, rs_A );
355 }
356 }
357
358 if ( n_left == 1 )
359 {
360 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
361 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
362 a2 = buff_A + (g )*cs_A;
363 a3 = buff_A + (g + 1)*cs_A;
364 a4 = buff_A + (g + 2)*cs_A;
365
366 gamma23_k1 = g23_k1->real;
367 sigma23_k1 = g23_k1->imag;
368 gamma34_k1 = g34_k1->real;
369 sigma34_k1 = g34_k1->imag;
370
373
375 {
377 &gamma23_k1,
378 &sigma23_k1,
379 a2, rs_A,
380 a3, rs_A );
381 }
382 else if ( is_ident23_k1 && !is_ident34_k1 )
383 {
385 &gamma34_k1,
386 &sigma34_k1,
387 a3, rs_A,
388 a4, rs_A );
389 }
390 else
391 {
393 &gamma23_k1,
394 &sigma23_k1,
395 &gamma34_k1,
396 &sigma34_k1,
397 a2, rs_A,
398 a3, rs_A,
399 a4, rs_A );
400 }
401 }
402 }
403
404 // Shutdown stage
405
406 for ( j = nG % n_fuse; j < k_G; j += n_fuse )
407 {
408 g = nG - 1;
409 k = j;
410
411 //n_left = 1;
412 //if ( n_left == 1 )
413 {
414 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
415 a2 = buff_A + (g )*cs_A;
416 a3 = buff_A + (g + 1)*cs_A;
417
418 gamma23_k1 = g23_k1->real;
419 sigma23_k1 = g23_k1->imag;
420
422
423 if ( !is_ident23_k1 )
425 &gamma23_k1,
426 &sigma23_k1,
427 a2, rs_A,
428 a3, rs_A );
429 ++k;
430 --g;
431 }
432
433 nG_app = k_minus_1 - j;
434 n_iter = nG_app / k_fuse;
435 n_left = nG_app % k_fuse;
436
437 for ( i = 0; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
438 {
439 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
440 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
441 g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
442 g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
443 a1 = buff_A + (g - 1)*cs_A;
444 a2 = buff_A + (g )*cs_A;
445 a3 = buff_A + (g + 1)*cs_A;
446 a4 = buff_A + (g + 2)*cs_A;
447
448 gamma23_k1 = g23_k1->real;
449 sigma23_k1 = g23_k1->imag;
450 gamma34_k1 = g34_k1->real;
451 sigma34_k1 = g34_k1->imag;
452 gamma12_k2 = g12_k2->real;
453 sigma12_k2 = g12_k2->imag;
454 gamma23_k2 = g23_k2->real;
455 sigma23_k2 = g23_k2->imag;
456
463
464 if ( has_ident )
465 {
466 // Apply to pairs of columns as needed.
467
468 if ( !is_ident23_k1 )
470 &gamma23_k1,
471 &sigma23_k1,
472 a2, rs_A,
473 a3, rs_A );
474
475 if ( !is_ident34_k1 )
477 &gamma34_k1,
478 &sigma34_k1,
479 a3, rs_A,
480 a4, rs_A );
481
482 if ( !is_ident12_k2 )
484 &gamma12_k2,
485 &sigma12_k2,
486 a1, rs_A,
487 a2, rs_A );
488
489 if ( !is_ident23_k2 )
491 &gamma23_k2,
492 &sigma23_k2,
493 a2, rs_A,
494 a3, rs_A );
495 }
496 else
497 {
498 // Apply to all four columns.
499
501 &gamma23_k1,
502 &sigma23_k1,
503 &gamma34_k1,
504 &sigma34_k1,
505 &gamma12_k2,
506 &sigma12_k2,
507 &gamma23_k2,
508 &sigma23_k2,
509 a1, rs_A,
510 a2, rs_A,
511 a3, rs_A,
512 a4, rs_A );
513 }
514 }
515
516 if ( n_left == 1 )
517 {
518 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
519 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
520 a2 = buff_A + (g )*cs_A;
521 a3 = buff_A + (g + 1)*cs_A;
522 a4 = buff_A + (g + 2)*cs_A;
523
524 gamma23_k1 = g23_k1->real;
525 sigma23_k1 = g23_k1->imag;
526 gamma34_k1 = g34_k1->real;
527 sigma34_k1 = g34_k1->imag;
528
531
533 {
535 &gamma23_k1,
536 &sigma23_k1,
537 a2, rs_A,
538 a3, rs_A );
539 }
540 else if ( is_ident23_k1 && !is_ident34_k1 )
541 {
543 &gamma34_k1,
544 &sigma34_k1,
545 a3, rs_A,
546 a4, rs_A );
547 }
548 else
549 {
551 &gamma23_k1,
552 &sigma23_k1,
553 &gamma34_k1,
554 &sigma34_k1,
555 a2, rs_A,
556 a3, rs_A,
557 a4, rs_A );
558 }
559 }
560 }
561
562 return FLA_SUCCESS;
563}

References bl1_s0(), bl1_s1(), FLA_Apply_G_rf_ops_var1(), i, and n_left.

Referenced by FLA_Apply_G_rf_bls_var3(), and FLA_Apply_G_rf_opt_var3().

◆ FLA_Apply_G_rf_ops_var4()

FLA_Error FLA_Apply_G_rf_ops_var4 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
float buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_ops_var5()

FLA_Error FLA_Apply_G_rf_ops_var5 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
float buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_ops_var6()

FLA_Error FLA_Apply_G_rf_ops_var6 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
float buff_A,
int  rs_A,
int  cs_A 
)
117{
118 float one = bl1_s1();
119 float zero = bl1_s0();
120 float gamma12;
121 float sigma12;
122 float gamma23;
123 float sigma23;
124 float* a1;
125 float* a2;
126 float* a3;
127 scomplex* g12;
128 scomplex* g23;
129 int i, j, g, k;
130 int nG, nG_app;
131 int n_iter;
132 int n_left;
133 int k_minus_1;
134 int n_fuse;
136
137 k_minus_1 = k_G - 1;
138 nG = n_A - 1;
139 n_fuse = 2;
140
141 // Use the simple variant for nG < (k - 1) or k == 1.
142 if ( nG < k_minus_1 || k_G == 1 )
143 {
145 m_A,
146 n_A,
147 buff_G, rs_G, cs_G,
148 buff_A, rs_A, cs_A );
149 return FLA_SUCCESS;
150 }
151
152
153 // Start-up phase.
154
155 for ( j = 0; j < k_minus_1; ++j )
156 {
157 nG_app = j + 1;
158 n_iter = nG_app / n_fuse;
159 n_left = nG_app % n_fuse;
160
161 for ( i = 0, k = 0, g = nG_app - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
162 {
163 g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
164 g23 = buff_G + (g )*rs_G + (k )*cs_G;
165 a1 = buff_A + (g - 1)*cs_A;
166 a2 = buff_A + (g )*cs_A;
167 a3 = buff_A + (g + 1)*cs_A;
168
169 gamma12 = g12->real;
170 sigma12 = g12->imag;
171 gamma23 = g23->real;
172 sigma23 = g23->imag;
173
174 is_ident12 = ( gamma12 == one && sigma12 == zero );
175 is_ident23 = ( gamma23 == one && sigma23 == zero );
176
177 if ( !is_ident12 && is_ident23 )
178 {
179 // Apply only to columns 1 and 2.
180
182 &gamma12,
183 &sigma12,
184 a1, rs_A,
185 a2, rs_A );
186 }
187 else if ( is_ident12 && !is_ident23 )
188 {
189 // Apply only to columns 2 and 3.
190
192 &gamma23,
193 &sigma23,
194 a2, rs_A,
195 a3, rs_A );
196 }
197 else if ( !is_ident12 && !is_ident23 )
198 {
199 // Apply to all three columns.
200
202 &gamma12,
203 &sigma12,
204 &gamma23,
205 &sigma23,
206 a1, rs_A,
207 a2, rs_A,
208 a3, rs_A );
209 }
210 }
211 //for ( k = 0; k < n_left; k += 1, g -= 1 )
212 if ( n_left == 1 )
213 {
214 g23 = buff_G + (g )*rs_G + (k )*cs_G;
215 a2 = buff_A + (g )*cs_A;
216 a3 = buff_A + (g + 1)*cs_A;
217
218 gamma23 = g23->real;
219 sigma23 = g23->imag;
220
221 is_ident23 = ( gamma23 == one && sigma23 == zero );
222
223 if ( !is_ident23 )
225 &gamma23,
226 &sigma23,
227 a2, rs_A,
228 a3, rs_A );
229 }
230 }
231
232 // Pipeline stage
233
234 for ( j = k_minus_1; j < nG; ++j )
235 {
236 nG_app = k_G;
237 n_iter = nG_app / n_fuse;
238 n_left = nG_app % n_fuse;
239
240 for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
241 {
242 g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
243 g23 = buff_G + (g )*rs_G + (k )*cs_G;
244 a1 = buff_A + (g - 1)*cs_A;
245 a2 = buff_A + (g )*cs_A;
246 a3 = buff_A + (g + 1)*cs_A;
247
248 gamma12 = g12->real;
249 sigma12 = g12->imag;
250 gamma23 = g23->real;
251 sigma23 = g23->imag;
252
253 is_ident12 = ( gamma12 == one && sigma12 == zero );
254 is_ident23 = ( gamma23 == one && sigma23 == zero );
255
256 if ( !is_ident12 && is_ident23 )
257 {
258 // Apply only to columns 1 and 2.
259
261 &gamma12,
262 &sigma12,
263 a1, rs_A,
264 a2, rs_A );
265 }
266 else if ( is_ident12 && !is_ident23 )
267 {
268 // Apply only to columns 2 and 3.
269
271 &gamma23,
272 &sigma23,
273 a2, rs_A,
274 a3, rs_A );
275 }
276 else if ( !is_ident12 && !is_ident23 )
277 {
278 // Apply to all three columns.
279
281 &gamma12,
282 &sigma12,
283 &gamma23,
284 &sigma23,
285 a1, rs_A,
286 a2, rs_A,
287 a3, rs_A );
288 }
289 }
290 //for ( k = 0; k < n_left; k += 1, g -= 1 )
291 if ( n_left == 1 )
292 {
293 g23 = buff_G + (g )*rs_G + (k )*cs_G;
294 a2 = buff_A + (g )*cs_A;
295 a3 = buff_A + (g + 1)*cs_A;
296
297 gamma23 = g23->real;
298 sigma23 = g23->imag;
299
300 is_ident23 = ( gamma23 == one && sigma23 == zero );
301
302 if ( !is_ident23 )
304 &gamma23,
305 &sigma23,
306 a2, rs_A,
307 a3, rs_A );
308 }
309 }
310
311 // Shutdown stage
312
313 for ( j = 1; j < k_G; ++j )
314 {
315 nG_app = k_G - j;
316 n_iter = nG_app / n_fuse;
317 n_left = nG_app % n_fuse;
318
319 for ( i = 0, k = j, g = nG - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
320 {
321 g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
322 g23 = buff_G + (g )*rs_G + (k )*cs_G;
323 a1 = buff_A + (g - 1)*cs_A;
324 a2 = buff_A + (g )*cs_A;
325 a3 = buff_A + (g + 1)*cs_A;
326
327 gamma12 = g12->real;
328 sigma12 = g12->imag;
329 gamma23 = g23->real;
330 sigma23 = g23->imag;
331
332 is_ident12 = ( gamma12 == one && sigma12 == zero );
333 is_ident23 = ( gamma23 == one && sigma23 == zero );
334
335 if ( !is_ident12 && is_ident23 )
336 {
337 // Apply only to columns 1 and 2.
338
340 &gamma12,
341 &sigma12,
342 a1, rs_A,
343 a2, rs_A );
344 }
345 else if ( is_ident12 && !is_ident23 )
346 {
347 // Apply only to columns 2 and 3.
348
350 &gamma23,
351 &sigma23,
352 a2, rs_A,
353 a3, rs_A );
354 }
355 else if ( !is_ident12 && !is_ident23 )
356 {
357 // Apply to all three columns.
358
360 &gamma12,
361 &sigma12,
362 &gamma23,
363 &sigma23,
364 a1, rs_A,
365 a2, rs_A,
366 a3, rs_A );
367 }
368 }
369 //for ( k = 0; k < nG_app_left; k += 1, g -= 1 )
370 if ( n_left == 1 )
371 {
372 g23 = buff_G + (g )*rs_G + (k )*cs_G;
373 a2 = buff_A + (g )*cs_A;
374 a3 = buff_A + (g + 1)*cs_A;
375
376 gamma23 = g23->real;
377 sigma23 = g23->imag;
378
379 is_ident23 = ( gamma23 == one && sigma23 == zero );
380
381 if ( !is_ident23 )
383 &gamma23,
384 &sigma23,
385 a2, rs_A,
386 a3, rs_A );
387 }
388 }
389
390 return FLA_SUCCESS;
391}

References bl1_s0(), bl1_s1(), FLA_Apply_G_rf_ops_var1(), i, and n_left.

Referenced by FLA_Apply_G_rf_opt_var6().

◆ FLA_Apply_G_rf_ops_var7()

FLA_Error FLA_Apply_G_rf_ops_var7 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
float buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_ops_var8()

FLA_Error FLA_Apply_G_rf_ops_var8 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
float buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_ops_var9()

FLA_Error FLA_Apply_G_rf_ops_var9 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
float buff_A,
int  rs_A,
int  cs_A 
)
118{
119 float one = bl1_s1();
120 float zero = bl1_s0();
121 float gamma12;
122 float sigma12;
123 float gamma23;
124 float sigma23;
125 float* a1;
126 float* a2;
127 float* a3;
128 scomplex* g12;
129 scomplex* g23;
130 int i, j, g, k;
131 int nG, nG_app;
132 int n_iter;
133 int n_left;
134 int k_minus_1;
135 int n_fuse;
137
138 k_minus_1 = k_G - 1;
139 nG = n_A - 1;
140 n_fuse = 2;
141
142 // Use the simple variant for nG < (k - 1) or k == 1.
143 if ( nG < 2*k_minus_1 || k_G == 1 )
144 {
146 m_A,
147 n_A,
148 buff_G, rs_G, cs_G,
149 buff_A, rs_A, cs_A );
150 return FLA_SUCCESS;
151 }
152
153
154 // Start-up phase.
155
156 for ( j = -1; j < k_minus_1; j += n_fuse )
157 {
158 nG_app = j + 1;
159 n_iter = nG_app;
160 n_left = 1;
161
162 for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g )
163 {
164 g12 = buff_G + (g )*rs_G + (k )*cs_G;
165 g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
166 a1 = buff_A + (g )*cs_A;
167 a2 = buff_A + (g + 1)*cs_A;
168 a3 = buff_A + (g + 2)*cs_A;
169
170 gamma12 = g12->real;
171 sigma12 = g12->imag;
172 gamma23 = g23->real;
173 sigma23 = g23->imag;
174
175 is_ident12 = ( gamma12 == one && sigma12 == zero );
176 is_ident23 = ( gamma23 == one && sigma23 == zero );
177
178 if ( !is_ident12 && is_ident23 )
179 {
180 // Apply only to columns 1 and 2.
181
183 &gamma12,
184 &sigma12,
185 a1, rs_A,
186 a2, rs_A );
187 }
188 else if ( is_ident12 && !is_ident23 )
189 {
190 // Apply only to columns 2 and 3.
191
193 &gamma23,
194 &sigma23,
195 a2, rs_A,
196 a3, rs_A );
197 }
198 else if ( !is_ident12 && !is_ident23 )
199 {
200 // Apply to all three columns.
201
203 &gamma12,
204 &sigma12,
205 &gamma23,
206 &sigma23,
207 a1, rs_A,
208 a2, rs_A,
209 a3, rs_A );
210 }
211 }
212
213 if ( n_left == 1 )
214 {
215 g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
216 a2 = buff_A + (g + 1)*cs_A;
217 a3 = buff_A + (g + 2)*cs_A;
218
219 gamma23 = g23->real;
220 sigma23 = g23->imag;
221
222 is_ident23 = ( gamma23 == one && sigma23 == zero );
223
224 if ( !is_ident23 )
226 &gamma23,
227 &sigma23,
228 a2, rs_A,
229 a3, rs_A );
230 }
231 }
232
233 // Pipeline stage
234
235 for ( ; j < nG - 1; j += n_fuse )
236 {
237 nG_app = k_G;
238 n_iter = nG_app;
239 n_left = 0;
240
241 for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g )
242 {
243 g12 = buff_G + (g )*rs_G + (k )*cs_G;
244 g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
245 a1 = buff_A + (g )*cs_A;
246 a2 = buff_A + (g + 1)*cs_A;
247 a3 = buff_A + (g + 2)*cs_A;
248
249 gamma12 = g12->real;
250 sigma12 = g12->imag;
251 gamma23 = g23->real;
252 sigma23 = g23->imag;
253
254 is_ident12 = ( gamma12 == one && sigma12 == zero );
255 is_ident23 = ( gamma23 == one && sigma23 == zero );
256
257 if ( !is_ident12 && is_ident23 )
258 {
259 // Apply only to columns 1 and 2.
260
262 &gamma12,
263 &sigma12,
264 a1, rs_A,
265 a2, rs_A );
266 }
267 else if ( is_ident12 && !is_ident23 )
268 {
269 // Apply only to columns 2 and 3.
270
272 &gamma23,
273 &sigma23,
274 a2, rs_A,
275 a3, rs_A );
276 }
277 else if ( !is_ident12 && !is_ident23 )
278 {
279 // Apply to all three columns.
280
282 &gamma12,
283 &sigma12,
284 &gamma23,
285 &sigma23,
286 a1, rs_A,
287 a2, rs_A,
288 a3, rs_A );
289 }
290 }
291 }
292
293 // Shutdown stage
294
295 for ( j = nG % n_fuse; j < k_G; j += n_fuse )
296 {
297 g = nG - 1;
298 k = j;
299
300 n_left = 1;
301 if ( n_left == 1 )
302 {
303 g12 = buff_G + (g )*rs_G + (k )*cs_G;
304 a1 = buff_A + (g )*cs_A;
305 a2 = buff_A + (g + 1)*cs_A;
306
307 gamma12 = g12->real;
308 sigma12 = g12->imag;
309
310 is_ident12 = ( gamma12 == one && sigma12 == zero );
311
312 if ( !is_ident12 )
314 &gamma12,
315 &sigma12,
316 a1, rs_A,
317 a2, rs_A );
318 ++k;
319 --g;
320 }
321
322 nG_app = k_minus_1 - j;
323 n_iter = nG_app;
324
325 for ( i = 0; i < n_iter; ++i, ++k, --g )
326 {
327 g12 = buff_G + (g )*rs_G + (k )*cs_G;
328 g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
329 a1 = buff_A + (g )*cs_A;
330 a2 = buff_A + (g + 1)*cs_A;
331 a3 = buff_A + (g + 2)*cs_A;
332
333 gamma12 = g12->real;
334 sigma12 = g12->imag;
335 gamma23 = g23->real;
336 sigma23 = g23->imag;
337
338 is_ident12 = ( gamma12 == one && sigma12 == zero );
339 is_ident23 = ( gamma23 == one && sigma23 == zero );
340
341 if ( !is_ident12 && is_ident23 )
342 {
343 // Apply only to columns 1 and 2.
344
346 &gamma12,
347 &sigma12,
348 a1, rs_A,
349 a2, rs_A );
350 }
351 else if ( is_ident12 && !is_ident23 )
352 {
353 // Apply only to columns 2 and 3.
354
356 &gamma23,
357 &sigma23,
358 a2, rs_A,
359 a3, rs_A );
360 }
361 else if ( !is_ident12 && !is_ident23 )
362 {
363 // Apply to all three columns.
364
366 &gamma12,
367 &sigma12,
368 &gamma23,
369 &sigma23,
370 a1, rs_A,
371 a2, rs_A,
372 a3, rs_A );
373 }
374 }
375 }
376
377 return FLA_SUCCESS;
378}

References bl1_s0(), bl1_s1(), FLA_Apply_G_rf_ops_var1(), i, and n_left.

Referenced by FLA_Apply_G_rf_opt_var9().

◆ FLA_Apply_G_rf_opt_var1()

FLA_Error FLA_Apply_G_rf_opt_var1 ( FLA_Obj  G,
FLA_Obj  A 
)
32{
33 FLA_Datatype datatype;
34 int k_G, m_A, n_A;
35 int rs_G, cs_G;
36 int rs_A, cs_A;
37
38 datatype = FLA_Obj_datatype( A );
39
40 k_G = FLA_Obj_width( G );
41 m_A = FLA_Obj_length( A );
42 n_A = FLA_Obj_width( A );
43
46
49
50 switch ( datatype )
51 {
52 case FLA_FLOAT:
53 {
55 float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
56
58 m_A,
59 n_A,
61 buff_A, rs_A, cs_A );
62
63 break;
64 }
65
66 case FLA_DOUBLE:
67 {
69 double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
70
72 m_A,
73 n_A,
75 buff_A, rs_A, cs_A );
76
77 break;
78 }
79
80 case FLA_COMPLEX:
81 {
84
86 m_A,
87 n_A,
89 buff_A, rs_A, cs_A );
90
91 break;
92 }
93
95 {
98
100 m_A,
101 n_A,
102 buff_G, rs_G, cs_G,
103 buff_A, rs_A, cs_A );
104
105 break;
106 }
107 }
108
109 return FLA_SUCCESS;
110}
FLA_Error FLA_Apply_G_rf_opc_var1(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_opt_var1.c:215
FLA_Error FLA_Apply_G_rf_ops_var1(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_opt_var1.c:113
FLA_Error FLA_Apply_G_rf_opd_var1(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_opt_var1.c:164
FLA_Error FLA_Apply_G_rf_opz_var1(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_opt_var1.c:267

References FLA_Apply_G_rf_opc_var1(), FLA_Apply_G_rf_opd_var1(), FLA_Apply_G_rf_ops_var1(), FLA_Apply_G_rf_opz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), and i.

Referenced by FLA_Apply_G_internal().

◆ FLA_Apply_G_rf_opt_var2()

FLA_Error FLA_Apply_G_rf_opt_var2 ( FLA_Obj  G,
FLA_Obj  A 
)
32{
33 FLA_Datatype datatype;
34 int k_G, m_A, n_A;
35 int rs_G, cs_G;
36 int rs_A, cs_A;
37
38 datatype = FLA_Obj_datatype( A );
39
40 k_G = FLA_Obj_width( G );
41 m_A = FLA_Obj_length( A );
42 n_A = FLA_Obj_width( A );
43
46
49
50 switch ( datatype )
51 {
52 case FLA_FLOAT:
53 {
55 float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
56
58 m_A,
59 n_A,
61 buff_A, rs_A, cs_A );
62
63 break;
64 }
65
66 case FLA_DOUBLE:
67 {
69 double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
70
72 m_A,
73 n_A,
75 buff_A, rs_A, cs_A );
76
77 break;
78 }
79
80 case FLA_COMPLEX:
81 {
84
86 m_A,
87 n_A,
89 buff_A, rs_A, cs_A );
90
91 break;
92 }
93
95 {
98
100 m_A,
101 n_A,
102 buff_G, rs_G, cs_G,
103 buff_A, rs_A, cs_A );
104
105 break;
106 }
107 }
108
109 return FLA_SUCCESS;
110}
FLA_Error FLA_Apply_G_rf_ops_var2(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_opt_var2.c:112
FLA_Error FLA_Apply_G_rf_opd_var2(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_opt_var2.c:225
FLA_Error FLA_Apply_G_rf_opz_var2(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_opt_var2.c:451
FLA_Error FLA_Apply_G_rf_opc_var2(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_opt_var2.c:338

References FLA_Apply_G_rf_opc_var2(), FLA_Apply_G_rf_opd_var2(), FLA_Apply_G_rf_ops_var2(), FLA_Apply_G_rf_opz_var2(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), and i.

◆ FLA_Apply_G_rf_opt_var3()

FLA_Error FLA_Apply_G_rf_opt_var3 ( FLA_Obj  G,
FLA_Obj  A 
)
32{
33 FLA_Datatype datatype;
34 int k_G, m_A, n_A;
35 int rs_G, cs_G;
36 int rs_A, cs_A;
37
38 datatype = FLA_Obj_datatype( A );
39
40 k_G = FLA_Obj_width( G );
41 m_A = FLA_Obj_length( A );
42 n_A = FLA_Obj_width( A );
43
46
49
50 switch ( datatype )
51 {
52 case FLA_FLOAT:
53 {
55 float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
56
58 m_A,
59 n_A,
61 buff_A, rs_A, cs_A );
62
63 break;
64 }
65
66 case FLA_DOUBLE:
67 {
69 double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
70
72 m_A,
73 n_A,
75 buff_A, rs_A, cs_A );
76
77 break;
78 }
79
80 case FLA_COMPLEX:
81 {
84
86 m_A,
87 n_A,
89 buff_A, rs_A, cs_A );
90
91 break;
92 }
93
95 {
98
100 m_A,
101 n_A,
102 buff_G, rs_G, cs_G,
103 buff_A, rs_A, cs_A );
104
105 break;
106 }
107 }
108
109 return FLA_SUCCESS;
110}
FLA_Error FLA_Apply_G_rf_opd_var3(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_opt_var3.c:565
FLA_Error FLA_Apply_G_rf_ops_var3(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_opt_var3.c:112
FLA_Error FLA_Apply_G_rf_opz_var3(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_opt_var3.c:1471
FLA_Error FLA_Apply_G_rf_opc_var3(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_opt_var3.c:1018

References FLA_Apply_G_rf_opc_var3(), FLA_Apply_G_rf_opd_var3(), FLA_Apply_G_rf_ops_var3(), FLA_Apply_G_rf_opz_var3(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), and i.

◆ FLA_Apply_G_rf_opt_var4()

FLA_Error FLA_Apply_G_rf_opt_var4 ( FLA_Obj  G,
FLA_Obj  A 
)

◆ FLA_Apply_G_rf_opt_var5()

FLA_Error FLA_Apply_G_rf_opt_var5 ( FLA_Obj  G,
FLA_Obj  A 
)

◆ FLA_Apply_G_rf_opt_var6()

FLA_Error FLA_Apply_G_rf_opt_var6 ( FLA_Obj  G,
FLA_Obj  A 
)
32{
33 FLA_Datatype datatype;
34 int k_G, m_A, n_A;
35 int rs_G, cs_G;
36 int rs_A, cs_A;
37
38 datatype = FLA_Obj_datatype( A );
39
40 k_G = FLA_Obj_width( G );
41 m_A = FLA_Obj_length( A );
42 n_A = FLA_Obj_width( A );
43
46
49
50 switch ( datatype )
51 {
52 case FLA_FLOAT:
53 {
55 float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
56
58 m_A,
59 n_A,
61 buff_A, rs_A, cs_A );
62
63 break;
64 }
65
66 case FLA_DOUBLE:
67 {
69 double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
70
72 m_A,
73 n_A,
75 buff_A, rs_A, cs_A );
76
77 break;
78 }
79
80 case FLA_COMPLEX:
81 {
84
86 m_A,
87 n_A,
89 buff_A, rs_A, cs_A );
90
91 break;
92 }
93
95 {
98
100 m_A,
101 n_A,
102 buff_G, rs_G, cs_G,
103 buff_A, rs_A, cs_A );
104
105 break;
106 }
107 }
108
109 return FLA_SUCCESS;
110}
FLA_Error FLA_Apply_G_rf_opc_var6(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_opt_var6.c:674
FLA_Error FLA_Apply_G_rf_opd_var6(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_opt_var6.c:393
FLA_Error FLA_Apply_G_rf_ops_var6(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_opt_var6.c:112
FLA_Error FLA_Apply_G_rf_opz_var6(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_opt_var6.c:955

References FLA_Apply_G_rf_opc_var6(), FLA_Apply_G_rf_opd_var6(), FLA_Apply_G_rf_ops_var6(), FLA_Apply_G_rf_opz_var6(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), and i.

◆ FLA_Apply_G_rf_opt_var7()

FLA_Error FLA_Apply_G_rf_opt_var7 ( FLA_Obj  G,
FLA_Obj  A 
)

◆ FLA_Apply_G_rf_opt_var8()

FLA_Error FLA_Apply_G_rf_opt_var8 ( FLA_Obj  G,
FLA_Obj  A 
)

◆ FLA_Apply_G_rf_opt_var9()

FLA_Error FLA_Apply_G_rf_opt_var9 ( FLA_Obj  G,
FLA_Obj  A 
)
32{
33 FLA_Datatype datatype;
34 int k_G, m_A, n_A;
35 int rs_G, cs_G;
36 int rs_A, cs_A;
37
38 datatype = FLA_Obj_datatype( A );
39
40 k_G = FLA_Obj_width( G );
41 m_A = FLA_Obj_length( A );
42 n_A = FLA_Obj_width( A );
43
46
49
50 switch ( datatype )
51 {
52 case FLA_FLOAT:
53 {
55 float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
56
58 m_A,
59 n_A,
61 buff_A, rs_A, cs_A );
62
63 break;
64 }
65
66 case FLA_DOUBLE:
67 {
69 double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
70
72 m_A,
73 n_A,
75 buff_A, rs_A, cs_A );
76
77 break;
78 }
79
80 case FLA_COMPLEX:
81 {
84
86 m_A,
87 n_A,
89 buff_A, rs_A, cs_A );
90
91 break;
92 }
93
95 {
98
100 m_A,
101 n_A,
102 buff_G, rs_G, cs_G,
103 buff_A, rs_A, cs_A );
104
105 break;
106 }
107 }
108
109 return FLA_SUCCESS;
110}
FLA_Error FLA_Apply_G_rf_opz_var9(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_opt_var9.c:914
FLA_Error FLA_Apply_G_rf_ops_var9(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_opt_var9.c:113
FLA_Error FLA_Apply_G_rf_opd_var9(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_opt_var9.c:380
FLA_Error FLA_Apply_G_rf_opc_var9(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_opt_var9.c:647

References FLA_Apply_G_rf_opc_var9(), FLA_Apply_G_rf_opd_var9(), FLA_Apply_G_rf_ops_var9(), FLA_Apply_G_rf_opz_var9(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), and i.

◆ FLA_Apply_G_rf_opz_var1()

FLA_Error FLA_Apply_G_rf_opz_var1 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)
272{
273 double one = bl1_d1();
274 double zero = bl1_d0();
275 int nG_app = n_A - 1;
276 int l, j;
277 double gamma;
278 double sigma;
279 dcomplex* a1;
280 dcomplex* a2;
281 dcomplex* g1;
282 dcomplex* g11;
283
284 g1 = buff_G;
285
286 for ( l = 0; l < k_G; ++l )
287 {
288 a1 = buff_A;
289 a2 = buff_A + cs_A;
290 g11 = g1;
291
292 for ( j = 0; j < nG_app; ++j )
293 {
294 gamma = g11->real;
295 sigma = g11->imag;
296
297 // Skip the current iteration if the rotation is identity.
298 if ( gamma != one || sigma != zero )
299 {
301 &gamma,
302 &sigma,
303 a1, rs_A,
304 a2, rs_A );
305 }
306
307 a1 += cs_A;
308 a2 += cs_A;
309 g11 += rs_G;
310 }
311
312 g1 += cs_G;
313 }
314
315 return FLA_SUCCESS;
316}

References bl1_d0(), bl1_d1(), i, and dcomplex::real.

Referenced by FLA_Apply_G_lf_opt_var1(), FLA_Apply_G_rf_opt_var1(), FLA_Apply_G_rf_opz_var2(), FLA_Apply_G_rf_opz_var3(), FLA_Apply_G_rf_opz_var6(), and FLA_Apply_G_rf_opz_var9().

◆ FLA_Apply_G_rf_opz_var2()

FLA_Error FLA_Apply_G_rf_opz_var2 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)
456{
457 double one = bl1_d1();
458 double zero = bl1_d0();
459 double gamma;
460 double sigma;
461 dcomplex* a1;
462 dcomplex* a2;
463 dcomplex* g11;
464 int j, g, k;
465 int nG, nG_app;
466 int k_minus_1;
467
468 k_minus_1 = k_G - 1;
469 nG = n_A - 1;
470
471 // Use the simple variant for nG < 2(k - 1).
472 if ( nG < k_minus_1 || k_G == 1 )
473 {
475 m_A,
476 n_A,
477 buff_G, rs_G, cs_G,
478 buff_A, rs_A, cs_A );
479 return FLA_SUCCESS;
480 }
481
482
483 // Start-up phase.
484
485 for ( j = 0; j < k_minus_1; ++j )
486 {
487 nG_app = j + 1;
488
489 for ( k = 0, g = nG_app - 1; k < nG_app; ++k, --g )
490 {
491 g11 = buff_G + (g )*rs_G + (k )*cs_G;
492 a1 = buff_A + (g )*cs_A;
493 a2 = buff_A + (g + 1)*cs_A;
494
495 gamma = g11->real;
496 sigma = g11->imag;
497
498 // Skip the current iteration if the rotation is identity.
499 if ( gamma == one && sigma == zero ) continue;
500
502 &gamma,
503 &sigma,
504 a1, rs_A,
505 a2, rs_A );
506 }
507 }
508
509 // Pipeline stage
510
511 for ( j = k_minus_1; j < nG; ++j )
512 {
513 nG_app = k_G;
514
515 for ( k = 0, g = j; k < nG_app; ++k, --g )
516 {
517 g11 = buff_G + (g )*rs_G + (k )*cs_G;
518 a1 = buff_A + (g )*cs_A;
519 a2 = buff_A + (g + 1)*cs_A;
520
521 gamma = g11->real;
522 sigma = g11->imag;
523
524 // Skip the current iteration if the rotation is identity.
525 if ( gamma == one && sigma == zero ) continue;
526
528 &gamma,
529 &sigma,
530 a1, rs_A,
531 a2, rs_A );
532 }
533 }
534
535 // Shutdown stage
536
537 for ( j = nG - k_minus_1; j < nG; ++j )
538 {
539 nG_app = nG - j;
540
541 for ( k = k_G - nG_app, g = nG - 1; k < k_G; ++k, --g )
542 {
543 g11 = buff_G + (g )*rs_G + (k )*cs_G;
544 a1 = buff_A + (g )*cs_A;
545 a2 = buff_A + (g + 1)*cs_A;
546
547 gamma = g11->real;
548 sigma = g11->imag;
549
550 // Skip the current iteration if the rotation is identity.
551 if ( gamma == one && sigma == zero ) continue;
552
554 &gamma,
555 &sigma,
556 a1, rs_A,
557 a2, rs_A );
558 }
559 }
560
561 return FLA_SUCCESS;
562}
FLA_Error FLA_Apply_G_rf_opz_var1(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_opt_var1.c:267

References bl1_d0(), bl1_d1(), FLA_Apply_G_rf_opz_var1(), and i.

Referenced by FLA_Apply_G_rf_opt_var2().

◆ FLA_Apply_G_rf_opz_var3()

FLA_Error FLA_Apply_G_rf_opz_var3 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)
1476{
1477 double one = bl1_d1();
1478 double zero = bl1_d0();
1479 double gamma23_k1;
1480 double sigma23_k1;
1481 double gamma34_k1;
1482 double sigma34_k1;
1483 double gamma12_k2;
1484 double sigma12_k2;
1485 double gamma23_k2;
1486 double sigma23_k2;
1487 dcomplex* a1;
1488 dcomplex* a2;
1489 dcomplex* a3;
1490 dcomplex* a4;
1495 int i, j, g, k;
1496 int nG, nG_app;
1497 int n_iter;
1498 int n_left;
1499 int k_minus_1;
1500 int n_fuse;
1501 int k_fuse;
1504 int has_ident;
1505
1506 k_minus_1 = k_G - 1;
1507 nG = n_A - 1;
1508 n_fuse = 2;
1509 k_fuse = 2;
1510
1511 // Use the simple variant for nG < (k - 1) or k == 1.
1512 if ( nG < 2*k_minus_1 || k_G == 1 )
1513 {
1515 m_A,
1516 n_A,
1517 buff_G, rs_G, cs_G,
1518 buff_A, rs_A, cs_A );
1519 return FLA_SUCCESS;
1520 }
1521
1522
1523 // Start-up phase.
1524
1525 for ( j = -1; j < k_minus_1; j += n_fuse )
1526 {
1527 nG_app = j + 2;
1528 n_iter = nG_app / k_fuse;
1529 n_left = 1;
1530
1531 for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
1532 {
1533 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1534 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1535 g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
1536 g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
1537 a1 = buff_A + (g - 1)*cs_A;
1538 a2 = buff_A + (g )*cs_A;
1539 a3 = buff_A + (g + 1)*cs_A;
1540 a4 = buff_A + (g + 2)*cs_A;
1541
1542 gamma23_k1 = g23_k1->real;
1543 sigma23_k1 = g23_k1->imag;
1544 gamma34_k1 = g34_k1->real;
1545 sigma34_k1 = g34_k1->imag;
1546 gamma12_k2 = g12_k2->real;
1547 sigma12_k2 = g12_k2->imag;
1548 gamma23_k2 = g23_k2->real;
1549 sigma23_k2 = g23_k2->imag;
1550
1557
1558 if ( has_ident )
1559 {
1560 // Apply to pairs of columns as needed.
1561
1562 if ( !is_ident23_k1 )
1564 &gamma23_k1,
1565 &sigma23_k1,
1566 a2, rs_A,
1567 a3, rs_A );
1568
1569 if ( !is_ident34_k1 )
1571 &gamma34_k1,
1572 &sigma34_k1,
1573 a3, rs_A,
1574 a4, rs_A );
1575
1576 if ( !is_ident12_k2 )
1578 &gamma12_k2,
1579 &sigma12_k2,
1580 a1, rs_A,
1581 a2, rs_A );
1582
1583 if ( !is_ident23_k2 )
1585 &gamma23_k2,
1586 &sigma23_k2,
1587 a2, rs_A,
1588 a3, rs_A );
1589 }
1590 else
1591 {
1592 // Apply to all four columns.
1593
1595 &gamma23_k1,
1596 &sigma23_k1,
1597 &gamma34_k1,
1598 &sigma34_k1,
1599 &gamma12_k2,
1600 &sigma12_k2,
1601 &gamma23_k2,
1602 &sigma23_k2,
1603 a1, rs_A,
1604 a2, rs_A,
1605 a3, rs_A,
1606 a4, rs_A );
1607 }
1608 }
1609
1610 if ( n_left == 1 )
1611 {
1612 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1613 a3 = buff_A + (g + 1)*cs_A;
1614 a4 = buff_A + (g + 2)*cs_A;
1615
1616 gamma34_k1 = g34_k1->real;
1617 sigma34_k1 = g34_k1->imag;
1618
1620
1621 if ( !is_ident34_k1 )
1623 &gamma34_k1,
1624 &sigma34_k1,
1625 a3, rs_A,
1626 a4, rs_A );
1627 }
1628 }
1629
1630 // Pipeline stage
1631
1632 for ( ; j < nG - 1; j += n_fuse )
1633 {
1634 nG_app = k_G;
1635 n_iter = nG_app / k_fuse;
1636 n_left = nG_app % k_fuse;
1637
1638 for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
1639 {
1640 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1641 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1642 g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
1643 g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
1644 a1 = buff_A + (g - 1)*cs_A;
1645 a2 = buff_A + (g )*cs_A;
1646 a3 = buff_A + (g + 1)*cs_A;
1647 a4 = buff_A + (g + 2)*cs_A;
1648
1649 gamma23_k1 = g23_k1->real;
1650 sigma23_k1 = g23_k1->imag;
1651 gamma34_k1 = g34_k1->real;
1652 sigma34_k1 = g34_k1->imag;
1653 gamma12_k2 = g12_k2->real;
1654 sigma12_k2 = g12_k2->imag;
1655 gamma23_k2 = g23_k2->real;
1656 sigma23_k2 = g23_k2->imag;
1657
1664
1665 if ( has_ident )
1666 {
1667 // Apply to pairs of columns as needed.
1668
1669 if ( !is_ident23_k1 )
1671 &gamma23_k1,
1672 &sigma23_k1,
1673 a2, rs_A,
1674 a3, rs_A );
1675
1676 if ( !is_ident34_k1 )
1678 &gamma34_k1,
1679 &sigma34_k1,
1680 a3, rs_A,
1681 a4, rs_A );
1682
1683 if ( !is_ident12_k2 )
1685 &gamma12_k2,
1686 &sigma12_k2,
1687 a1, rs_A,
1688 a2, rs_A );
1689
1690 if ( !is_ident23_k2 )
1692 &gamma23_k2,
1693 &sigma23_k2,
1694 a2, rs_A,
1695 a3, rs_A );
1696 }
1697 else
1698 {
1699 // Apply to all four columns.
1700
1702 &gamma23_k1,
1703 &sigma23_k1,
1704 &gamma34_k1,
1705 &sigma34_k1,
1706 &gamma12_k2,
1707 &sigma12_k2,
1708 &gamma23_k2,
1709 &sigma23_k2,
1710 a1, rs_A,
1711 a2, rs_A,
1712 a3, rs_A,
1713 a4, rs_A );
1714 }
1715 }
1716
1717 if ( n_left == 1 )
1718 {
1719 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1720 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1721 a2 = buff_A + (g )*cs_A;
1722 a3 = buff_A + (g + 1)*cs_A;
1723 a4 = buff_A + (g + 2)*cs_A;
1724
1725 gamma23_k1 = g23_k1->real;
1726 sigma23_k1 = g23_k1->imag;
1727 gamma34_k1 = g34_k1->real;
1728 sigma34_k1 = g34_k1->imag;
1729
1732
1733 if ( !is_ident23_k1 && is_ident34_k1 )
1734 {
1736 &gamma23_k1,
1737 &sigma23_k1,
1738 a2, rs_A,
1739 a3, rs_A );
1740 }
1741 else if ( is_ident23_k1 && !is_ident34_k1 )
1742 {
1744 &gamma34_k1,
1745 &sigma34_k1,
1746 a3, rs_A,
1747 a4, rs_A );
1748 }
1749 else
1750 {
1752 &gamma23_k1,
1753 &sigma23_k1,
1754 &gamma34_k1,
1755 &sigma34_k1,
1756 a2, rs_A,
1757 a3, rs_A,
1758 a4, rs_A );
1759 }
1760 }
1761 }
1762
1763 // Shutdown stage
1764
1765 for ( j = nG % n_fuse; j < k_G; j += n_fuse )
1766 {
1767 g = nG - 1;
1768 k = j;
1769
1770 //n_left = 1;
1771 //if ( n_left == 1 )
1772 {
1773 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1774 a2 = buff_A + (g )*cs_A;
1775 a3 = buff_A + (g + 1)*cs_A;
1776
1777 gamma23_k1 = g23_k1->real;
1778 sigma23_k1 = g23_k1->imag;
1779
1781
1782 if ( !is_ident23_k1 )
1784 &gamma23_k1,
1785 &sigma23_k1,
1786 a2, rs_A,
1787 a3, rs_A );
1788 ++k;
1789 --g;
1790 }
1791
1792 nG_app = k_minus_1 - j;
1793 n_iter = nG_app / k_fuse;
1794 n_left = nG_app % k_fuse;
1795
1796 for ( i = 0; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
1797 {
1798 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1799 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1800 g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
1801 g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
1802 a1 = buff_A + (g - 1)*cs_A;
1803 a2 = buff_A + (g )*cs_A;
1804 a3 = buff_A + (g + 1)*cs_A;
1805 a4 = buff_A + (g + 2)*cs_A;
1806
1807 gamma23_k1 = g23_k1->real;
1808 sigma23_k1 = g23_k1->imag;
1809 gamma34_k1 = g34_k1->real;
1810 sigma34_k1 = g34_k1->imag;
1811 gamma12_k2 = g12_k2->real;
1812 sigma12_k2 = g12_k2->imag;
1813 gamma23_k2 = g23_k2->real;
1814 sigma23_k2 = g23_k2->imag;
1815
1822
1823 if ( has_ident )
1824 {
1825 // Apply to pairs of columns as needed.
1826
1827 if ( !is_ident23_k1 )
1829 &gamma23_k1,
1830 &sigma23_k1,
1831 a2, rs_A,
1832 a3, rs_A );
1833
1834 if ( !is_ident34_k1 )
1836 &gamma34_k1,
1837 &sigma34_k1,
1838 a3, rs_A,
1839 a4, rs_A );
1840
1841 if ( !is_ident12_k2 )
1843 &gamma12_k2,
1844 &sigma12_k2,
1845 a1, rs_A,
1846 a2, rs_A );
1847
1848 if ( !is_ident23_k2 )
1850 &gamma23_k2,
1851 &sigma23_k2,
1852 a2, rs_A,
1853 a3, rs_A );
1854 }
1855 else
1856 {
1857 // Apply to all four columns.
1858
1860 &gamma23_k1,
1861 &sigma23_k1,
1862 &gamma34_k1,
1863 &sigma34_k1,
1864 &gamma12_k2,
1865 &sigma12_k2,
1866 &gamma23_k2,
1867 &sigma23_k2,
1868 a1, rs_A,
1869 a2, rs_A,
1870 a3, rs_A,
1871 a4, rs_A );
1872 }
1873 }
1874
1875 if ( n_left == 1 )
1876 {
1877 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1878 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1879 a2 = buff_A + (g )*cs_A;
1880 a3 = buff_A + (g + 1)*cs_A;
1881 a4 = buff_A + (g + 2)*cs_A;
1882
1883 gamma23_k1 = g23_k1->real;
1884 sigma23_k1 = g23_k1->imag;
1885 gamma34_k1 = g34_k1->real;
1886 sigma34_k1 = g34_k1->imag;
1887
1890
1891 if ( !is_ident23_k1 && is_ident34_k1 )
1892 {
1894 &gamma23_k1,
1895 &sigma23_k1,
1896 a2, rs_A,
1897 a3, rs_A );
1898 }
1899 else if ( is_ident23_k1 && !is_ident34_k1 )
1900 {
1902 &gamma34_k1,
1903 &sigma34_k1,
1904 a3, rs_A,
1905 a4, rs_A );
1906 }
1907 else
1908 {
1910 &gamma23_k1,
1911 &sigma23_k1,
1912 &gamma34_k1,
1913 &sigma34_k1,
1914 a2, rs_A,
1915 a3, rs_A,
1916 a4, rs_A );
1917 }
1918 }
1919 }
1920
1921 return FLA_SUCCESS;
1922}

References bl1_d0(), bl1_d1(), FLA_Apply_G_rf_opz_var1(), i, and n_left.

Referenced by FLA_Apply_G_rf_blz_var3(), and FLA_Apply_G_rf_opt_var3().

◆ FLA_Apply_G_rf_opz_var4()

FLA_Error FLA_Apply_G_rf_opz_var4 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_opz_var5()

FLA_Error FLA_Apply_G_rf_opz_var5 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_opz_var6()

FLA_Error FLA_Apply_G_rf_opz_var6 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)
960{
961 double one = bl1_d1();
962 double zero = bl1_d0();
963 double gamma12;
964 double sigma12;
965 double gamma23;
966 double sigma23;
967 dcomplex* a1;
968 dcomplex* a2;
969 dcomplex* a3;
970 dcomplex* g12;
971 dcomplex* g23;
972 int i, j, g, k;
973 int nG, nG_app;
974 int n_iter;
975 int n_left;
976 int k_minus_1;
977 int n_fuse;
979
980 k_minus_1 = k_G - 1;
981 nG = n_A - 1;
982 n_fuse = 2;
983
984 // Use the simple variant for nG < (k - 1) or k == 1.
985 if ( nG < k_minus_1 || k_G == 1 )
986 {
988 m_A,
989 n_A,
990 buff_G, rs_G, cs_G,
991 buff_A, rs_A, cs_A );
992 return FLA_SUCCESS;
993 }
994
995
996 // Start-up phase.
997
998 for ( j = 0; j < k_minus_1; ++j )
999 {
1000 nG_app = j + 1;
1001 n_iter = nG_app / n_fuse;
1002 n_left = nG_app % n_fuse;
1003
1004 for ( i = 0, k = 0, g = nG_app - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
1005 {
1006 g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
1007 g23 = buff_G + (g )*rs_G + (k )*cs_G;
1008 a1 = buff_A + (g - 1)*cs_A;
1009 a2 = buff_A + (g )*cs_A;
1010 a3 = buff_A + (g + 1)*cs_A;
1011
1012 gamma12 = g12->real;
1013 sigma12 = g12->imag;
1014 gamma23 = g23->real;
1015 sigma23 = g23->imag;
1016
1017 is_ident12 = ( gamma12 == one && sigma12 == zero );
1018 is_ident23 = ( gamma23 == one && sigma23 == zero );
1019
1020 if ( !is_ident12 && is_ident23 )
1021 {
1022 // Apply only to columns 1 and 2.
1023
1025 &gamma12,
1026 &sigma12,
1027 a1, rs_A,
1028 a2, rs_A );
1029 }
1030 else if ( is_ident12 && !is_ident23 )
1031 {
1032 // Apply only to columns 2 and 3.
1033
1035 &gamma23,
1036 &sigma23,
1037 a2, rs_A,
1038 a3, rs_A );
1039 }
1040 else if ( !is_ident12 && !is_ident23 )
1041 {
1042 // Apply to all three columns.
1043
1045 &gamma12,
1046 &sigma12,
1047 &gamma23,
1048 &sigma23,
1049 a1, rs_A,
1050 a2, rs_A,
1051 a3, rs_A );
1052 }
1053 }
1054 //for ( k = 0; k < n_left; k += 1, g -= 1 )
1055 if ( n_left == 1 )
1056 {
1057 g23 = buff_G + (g )*rs_G + (k )*cs_G;
1058 a2 = buff_A + (g )*cs_A;
1059 a3 = buff_A + (g + 1)*cs_A;
1060
1061 gamma23 = g23->real;
1062 sigma23 = g23->imag;
1063
1064 is_ident23 = ( gamma23 == one && sigma23 == zero );
1065
1066 if ( !is_ident23 )
1068 &gamma23,
1069 &sigma23,
1070 a2, rs_A,
1071 a3, rs_A );
1072 }
1073 }
1074
1075 // Pipeline stage
1076
1077 for ( j = k_minus_1; j < nG; ++j )
1078 {
1079 nG_app = k_G;
1080 n_iter = nG_app / n_fuse;
1081 n_left = nG_app % n_fuse;
1082
1083 for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
1084 {
1085 g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
1086 g23 = buff_G + (g )*rs_G + (k )*cs_G;
1087 a1 = buff_A + (g - 1)*cs_A;
1088 a2 = buff_A + (g )*cs_A;
1089 a3 = buff_A + (g + 1)*cs_A;
1090
1091 gamma12 = g12->real;
1092 sigma12 = g12->imag;
1093 gamma23 = g23->real;
1094 sigma23 = g23->imag;
1095
1096 is_ident12 = ( gamma12 == one && sigma12 == zero );
1097 is_ident23 = ( gamma23 == one && sigma23 == zero );
1098
1099 if ( !is_ident12 && is_ident23 )
1100 {
1101 // Apply only to columns 1 and 2.
1102
1104 &gamma12,
1105 &sigma12,
1106 a1, rs_A,
1107 a2, rs_A );
1108 }
1109 else if ( is_ident12 && !is_ident23 )
1110 {
1111 // Apply only to columns 2 and 3.
1112
1114 &gamma23,
1115 &sigma23,
1116 a2, rs_A,
1117 a3, rs_A );
1118 }
1119 else if ( !is_ident12 && !is_ident23 )
1120 {
1121 // Apply to all three columns.
1122
1124 &gamma12,
1125 &sigma12,
1126 &gamma23,
1127 &sigma23,
1128 a1, rs_A,
1129 a2, rs_A,
1130 a3, rs_A );
1131 }
1132 }
1133 //for ( k = 0; k < n_left; k += 1, g -= 1 )
1134 if ( n_left == 1 )
1135 {
1136 g23 = buff_G + (g )*rs_G + (k )*cs_G;
1137 a2 = buff_A + (g )*cs_A;
1138 a3 = buff_A + (g + 1)*cs_A;
1139
1140 gamma23 = g23->real;
1141 sigma23 = g23->imag;
1142
1143 is_ident23 = ( gamma23 == one && sigma23 == zero );
1144
1145 if ( !is_ident23 )
1147 &gamma23,
1148 &sigma23,
1149 a2, rs_A,
1150 a3, rs_A );
1151 }
1152 }
1153
1154 // Shutdown stage
1155
1156 for ( j = 1; j < k_G; ++j )
1157 {
1158 nG_app = k_G - j;
1159 n_iter = nG_app / n_fuse;
1160 n_left = nG_app % n_fuse;
1161
1162 for ( i = 0, k = j, g = nG - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
1163 {
1164 g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
1165 g23 = buff_G + (g )*rs_G + (k )*cs_G;
1166 a1 = buff_A + (g - 1)*cs_A;
1167 a2 = buff_A + (g )*cs_A;
1168 a3 = buff_A + (g + 1)*cs_A;
1169
1170 gamma12 = g12->real;
1171 sigma12 = g12->imag;
1172 gamma23 = g23->real;
1173 sigma23 = g23->imag;
1174
1175 is_ident12 = ( gamma12 == one && sigma12 == zero );
1176 is_ident23 = ( gamma23 == one && sigma23 == zero );
1177
1178 if ( !is_ident12 && is_ident23 )
1179 {
1180 // Apply only to columns 1 and 2.
1181
1183 &gamma12,
1184 &sigma12,
1185 a1, rs_A,
1186 a2, rs_A );
1187 }
1188 else if ( is_ident12 && !is_ident23 )
1189 {
1190 // Apply only to columns 2 and 3.
1191
1193 &gamma23,
1194 &sigma23,
1195 a2, rs_A,
1196 a3, rs_A );
1197 }
1198 else if ( !is_ident12 && !is_ident23 )
1199 {
1200 // Apply to all three columns.
1201
1203 &gamma12,
1204 &sigma12,
1205 &gamma23,
1206 &sigma23,
1207 a1, rs_A,
1208 a2, rs_A,
1209 a3, rs_A );
1210 }
1211 }
1212 //for ( k = 0; k < nG_app_left; k += 1, g -= 1 )
1213 if ( n_left == 1 )
1214 {
1215 g23 = buff_G + (g )*rs_G + (k )*cs_G;
1216 a2 = buff_A + (g )*cs_A;
1217 a3 = buff_A + (g + 1)*cs_A;
1218
1219 gamma23 = g23->real;
1220 sigma23 = g23->imag;
1221
1222 is_ident23 = ( gamma23 == one && sigma23 == zero );
1223
1224 if ( !is_ident23 )
1226 &gamma23,
1227 &sigma23,
1228 a2, rs_A,
1229 a3, rs_A );
1230 }
1231 }
1232
1233 return FLA_SUCCESS;
1234}

References bl1_d0(), bl1_d1(), FLA_Apply_G_rf_opz_var1(), i, and n_left.

Referenced by FLA_Apply_G_rf_opt_var6().

◆ FLA_Apply_G_rf_opz_var7()

FLA_Error FLA_Apply_G_rf_opz_var7 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_opz_var8()

FLA_Error FLA_Apply_G_rf_opz_var8 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_opz_var9()

FLA_Error FLA_Apply_G_rf_opz_var9 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)
919{
920 double one = bl1_d1();
921 double zero = bl1_d0();
922 double gamma12;
923 double sigma12;
924 double gamma23;
925 double sigma23;
926 dcomplex* a1;
927 dcomplex* a2;
928 dcomplex* a3;
929 dcomplex* g12;
930 dcomplex* g23;
931 int i, j, g, k;
932 int nG, nG_app;
933 int n_iter;
934 int n_left;
935 int k_minus_1;
936 int n_fuse;
938
939 k_minus_1 = k_G - 1;
940 nG = n_A - 1;
941 n_fuse = 2;
942
943 // Use the simple variant for nG < (k - 1) or k == 1.
944 if ( nG < 2*k_minus_1 || k_G == 1 )
945 {
947 m_A,
948 n_A,
949 buff_G, rs_G, cs_G,
950 buff_A, rs_A, cs_A );
951 return FLA_SUCCESS;
952 }
953
954
955 // Start-up phase.
956
957 for ( j = -1; j < k_minus_1; j += n_fuse )
958 {
959 nG_app = j + 1;
960 n_iter = nG_app;
961 n_left = 1;
962
963 for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g )
964 {
965 g12 = buff_G + (g )*rs_G + (k )*cs_G;
966 g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
967 a1 = buff_A + (g )*cs_A;
968 a2 = buff_A + (g + 1)*cs_A;
969 a3 = buff_A + (g + 2)*cs_A;
970
971 gamma12 = g12->real;
972 sigma12 = g12->imag;
973 gamma23 = g23->real;
974 sigma23 = g23->imag;
975
976 is_ident12 = ( gamma12 == one && sigma12 == zero );
977 is_ident23 = ( gamma23 == one && sigma23 == zero );
978
979 if ( !is_ident12 && is_ident23 )
980 {
981 // Apply only to columns 1 and 2.
982
984 &gamma12,
985 &sigma12,
986 a1, rs_A,
987 a2, rs_A );
988 }
989 else if ( is_ident12 && !is_ident23 )
990 {
991 // Apply only to columns 2 and 3.
992
994 &gamma23,
995 &sigma23,
996 a2, rs_A,
997 a3, rs_A );
998 }
999 else if ( !is_ident12 && !is_ident23 )
1000 {
1001 // Apply to all three columns.
1002
1004 &gamma12,
1005 &sigma12,
1006 &gamma23,
1007 &sigma23,
1008 a1, rs_A,
1009 a2, rs_A,
1010 a3, rs_A );
1011 }
1012 }
1013
1014 if ( n_left == 1 )
1015 {
1016 g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1017 a2 = buff_A + (g + 1)*cs_A;
1018 a3 = buff_A + (g + 2)*cs_A;
1019
1020 gamma23 = g23->real;
1021 sigma23 = g23->imag;
1022
1023 is_ident23 = ( gamma23 == one && sigma23 == zero );
1024
1025 if ( !is_ident23 )
1027 &gamma23,
1028 &sigma23,
1029 a2, rs_A,
1030 a3, rs_A );
1031 }
1032 }
1033
1034 // Pipeline stage
1035
1036 for ( ; j < nG - 1; j += n_fuse )
1037 {
1038 nG_app = k_G;
1039 n_iter = nG_app;
1040 n_left = 0;
1041
1042 for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g )
1043 {
1044 g12 = buff_G + (g )*rs_G + (k )*cs_G;
1045 g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1046 a1 = buff_A + (g )*cs_A;
1047 a2 = buff_A + (g + 1)*cs_A;
1048 a3 = buff_A + (g + 2)*cs_A;
1049
1050 gamma12 = g12->real;
1051 sigma12 = g12->imag;
1052 gamma23 = g23->real;
1053 sigma23 = g23->imag;
1054
1055 is_ident12 = ( gamma12 == one && sigma12 == zero );
1056 is_ident23 = ( gamma23 == one && sigma23 == zero );
1057
1058 if ( !is_ident12 && is_ident23 )
1059 {
1060 // Apply only to columns 1 and 2.
1061
1063 &gamma12,
1064 &sigma12,
1065 a1, rs_A,
1066 a2, rs_A );
1067 }
1068 else if ( is_ident12 && !is_ident23 )
1069 {
1070 // Apply only to columns 2 and 3.
1071
1073 &gamma23,
1074 &sigma23,
1075 a2, rs_A,
1076 a3, rs_A );
1077 }
1078 else if ( !is_ident12 && !is_ident23 )
1079 {
1080 // Apply to all three columns.
1081
1083 &gamma12,
1084 &sigma12,
1085 &gamma23,
1086 &sigma23,
1087 a1, rs_A,
1088 a2, rs_A,
1089 a3, rs_A );
1090 }
1091 }
1092 }
1093
1094 // Shutdown stage
1095
1096 for ( j = nG % n_fuse; j < k_G; j += n_fuse )
1097 {
1098 g = nG - 1;
1099 k = j;
1100
1101 n_left = 1;
1102 if ( n_left == 1 )
1103 {
1104 g12 = buff_G + (g )*rs_G + (k )*cs_G;
1105 a1 = buff_A + (g )*cs_A;
1106 a2 = buff_A + (g + 1)*cs_A;
1107
1108 gamma12 = g12->real;
1109 sigma12 = g12->imag;
1110
1111 is_ident12 = ( gamma12 == one && sigma12 == zero );
1112
1113 if ( !is_ident12 )
1115 &gamma12,
1116 &sigma12,
1117 a1, rs_A,
1118 a2, rs_A );
1119 ++k;
1120 --g;
1121 }
1122
1123 nG_app = k_minus_1 - j;
1124 n_iter = nG_app;
1125
1126 for ( i = 0; i < n_iter; ++i, ++k, --g )
1127 {
1128 g12 = buff_G + (g )*rs_G + (k )*cs_G;
1129 g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1130 a1 = buff_A + (g )*cs_A;
1131 a2 = buff_A + (g + 1)*cs_A;
1132 a3 = buff_A + (g + 2)*cs_A;
1133
1134 gamma12 = g12->real;
1135 sigma12 = g12->imag;
1136 gamma23 = g23->real;
1137 sigma23 = g23->imag;
1138
1139 is_ident12 = ( gamma12 == one && sigma12 == zero );
1140 is_ident23 = ( gamma23 == one && sigma23 == zero );
1141
1142 if ( !is_ident12 && is_ident23 )
1143 {
1144 // Apply only to columns 1 and 2.
1145
1147 &gamma12,
1148 &sigma12,
1149 a1, rs_A,
1150 a2, rs_A );
1151 }
1152 else if ( is_ident12 && !is_ident23 )
1153 {
1154 // Apply only to columns 2 and 3.
1155
1157 &gamma23,
1158 &sigma23,
1159 a2, rs_A,
1160 a3, rs_A );
1161 }
1162 else if ( !is_ident12 && !is_ident23 )
1163 {
1164 // Apply to all three columns.
1165
1167 &gamma12,
1168 &sigma12,
1169 &gamma23,
1170 &sigma23,
1171 a1, rs_A,
1172 a2, rs_A,
1173 a3, rs_A );
1174 }
1175 }
1176 }
1177
1178 return FLA_SUCCESS;
1179}

References bl1_d0(), bl1_d1(), FLA_Apply_G_rf_opz_var1(), i, and n_left.

Referenced by FLA_Apply_G_rf_opt_var9().