libflame revision_anchor
Functions
FLA_Bidiag_UT_u.h File Reference

(r)

Go to the source code of this file.

Functions

FLA_Error FLA_Bidiag_UT_u_unb_var1 (FLA_Obj A, FLA_Obj TU, FLA_Obj TV)
 
FLA_Error FLA_Bidiag_UT_u_blk_var1 (FLA_Obj A, FLA_Obj TU, FLA_Obj TV)
 
FLA_Error FLA_Bidiag_UT_u_step_unb_var1 (FLA_Obj A, FLA_Obj TU, FLA_Obj TV)
 
FLA_Error FLA_Bidiag_UT_u_unb_var2 (FLA_Obj A, FLA_Obj TU, FLA_Obj TV)
 
FLA_Error FLA_Bidiag_UT_u_blk_var2 (FLA_Obj A, FLA_Obj TU, FLA_Obj TV)
 
FLA_Error FLA_Bidiag_UT_u_blf_var2 (FLA_Obj A, FLA_Obj TU, FLA_Obj TV)
 
FLA_Error FLA_Bidiag_UT_u_step_unb_var2 (FLA_Obj A, FLA_Obj TU, FLA_Obj TV)
 
FLA_Error FLA_Bidiag_UT_u_unb_var3 (FLA_Obj A, FLA_Obj TU, FLA_Obj TV)
 
FLA_Error FLA_Bidiag_UT_u_blk_var3 (FLA_Obj A, FLA_Obj TU, FLA_Obj TV)
 
FLA_Error FLA_Bidiag_UT_u_blf_var3 (FLA_Obj A, FLA_Obj TU, FLA_Obj TV)
 
FLA_Error FLA_Bidiag_UT_u_step_unb_var3 (FLA_Obj A, FLA_Obj TU, FLA_Obj TV)
 
FLA_Error FLA_Bidiag_UT_u_unb_var4 (FLA_Obj A, FLA_Obj TU, FLA_Obj TV)
 
FLA_Error FLA_Bidiag_UT_u_blk_var4 (FLA_Obj A, FLA_Obj TU, FLA_Obj TV)
 
FLA_Error FLA_Bidiag_UT_u_blf_var4 (FLA_Obj A, FLA_Obj TU, FLA_Obj TV)
 
FLA_Error FLA_Bidiag_UT_u_step_unb_var4 (FLA_Obj A, FLA_Obj Y, FLA_Obj Z, FLA_Obj TU, FLA_Obj TV)
 
FLA_Error FLA_Bidiag_UT_u_unb_var5 (FLA_Obj A, FLA_Obj TU, FLA_Obj TV)
 
FLA_Error FLA_Bidiag_UT_u_blk_var5 (FLA_Obj A, FLA_Obj TU, FLA_Obj TV)
 
FLA_Error FLA_Bidiag_UT_u_step_unb_var5 (FLA_Obj A, FLA_Obj Y, FLA_Obj Z, FLA_Obj TU, FLA_Obj TV)
 
FLA_Error FLA_Bidiag_UT_u_opt_var1 (FLA_Obj A, FLA_Obj T, FLA_Obj S)
 
FLA_Error FLA_Bidiag_UT_u_step_opt_var1 (FLA_Obj A, FLA_Obj T, FLA_Obj S)
 
FLA_Error FLA_Bidiag_UT_u_step_ops_var1 (int m_A, int n_A, int m_TS, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T, float *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_opd_var1 (int m_A, int n_A, int m_TS, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T, double *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_opc_var1 (int m_A, int n_A, int m_TS, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T, scomplex *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_opz_var1 (int m_A, int n_A, int m_TS, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T, dcomplex *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_opt_var2 (FLA_Obj A, FLA_Obj T, FLA_Obj S)
 
FLA_Error FLA_Bidiag_UT_u_step_opt_var2 (FLA_Obj A, FLA_Obj T, FLA_Obj S)
 
FLA_Error FLA_Bidiag_UT_u_step_ops_var2 (int m_A, int n_A, int m_TS, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T, float *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_opd_var2 (int m_A, int n_A, int m_TS, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T, double *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_opc_var2 (int m_A, int n_A, int m_TS, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T, scomplex *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_opz_var2 (int m_A, int n_A, int m_TS, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T, dcomplex *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_opt_var3 (FLA_Obj A, FLA_Obj T, FLA_Obj S)
 
FLA_Error FLA_Bidiag_UT_u_step_opt_var3 (FLA_Obj A, FLA_Obj T, FLA_Obj S)
 
FLA_Error FLA_Bidiag_UT_u_step_ops_var3 (int m_A, int n_A, int m_TS, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T, float *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_opd_var3 (int m_A, int n_A, int m_TS, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T, double *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_opc_var3 (int m_A, int n_A, int m_TS, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T, scomplex *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_opz_var3 (int m_A, int n_A, int m_TS, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T, dcomplex *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_opt_var4 (FLA_Obj A, FLA_Obj T, FLA_Obj S)
 
FLA_Error FLA_Bidiag_UT_u_step_opt_var4 (FLA_Obj A, FLA_Obj Y, FLA_Obj Z, FLA_Obj T, FLA_Obj S)
 
FLA_Error FLA_Bidiag_UT_u_step_ops_var4 (int m_A, int n_A, int m_TS, float *buff_A, int rs_A, int cs_A, float *buff_Y, int rs_Y, int cs_Y, float *buff_Z, int rs_Z, int cs_Z, float *buff_T, int rs_T, int cs_T, float *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_opd_var4 (int m_A, int n_A, int m_TS, double *buff_A, int rs_A, int cs_A, double *buff_Y, int rs_Y, int cs_Y, double *buff_Z, int rs_Z, int cs_Z, double *buff_T, int rs_T, int cs_T, double *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_opc_var4 (int m_A, int n_A, int m_TS, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_Y, int rs_Y, int cs_Y, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_T, int rs_T, int cs_T, scomplex *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_opz_var4 (int m_A, int n_A, int m_TS, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_Y, int rs_Y, int cs_Y, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_T, int rs_T, int cs_T, dcomplex *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_opt_var5 (FLA_Obj A, FLA_Obj T, FLA_Obj S)
 
FLA_Error FLA_Bidiag_UT_u_step_opt_var5 (FLA_Obj A, FLA_Obj Y, FLA_Obj Z, FLA_Obj T, FLA_Obj S)
 
FLA_Error FLA_Bidiag_UT_u_step_ops_var5 (int m_A, int n_A, int m_TS, float *buff_A, int rs_A, int cs_A, float *buff_Y, int rs_Y, int cs_Y, float *buff_Z, int rs_Z, int cs_Z, float *buff_T, int rs_T, int cs_T, float *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_opd_var5 (int m_A, int n_A, int m_TS, double *buff_A, int rs_A, int cs_A, double *buff_Y, int rs_Y, int cs_Y, double *buff_Z, int rs_Z, int cs_Z, double *buff_T, int rs_T, int cs_T, double *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_opc_var5 (int m_A, int n_A, int m_TS, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_Y, int rs_Y, int cs_Y, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_T, int rs_T, int cs_T, scomplex *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_opz_var5 (int m_A, int n_A, int m_TS, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_Y, int rs_Y, int cs_Y, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_T, int rs_T, int cs_T, dcomplex *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_ofu_var2 (FLA_Obj A, FLA_Obj T, FLA_Obj S)
 
FLA_Error FLA_Bidiag_UT_u_step_ofu_var2 (FLA_Obj A, FLA_Obj T, FLA_Obj S)
 
FLA_Error FLA_Bidiag_UT_u_step_ofs_var2 (int m_A, int n_A, int m_TS, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T, float *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_ofd_var2 (int m_A, int n_A, int m_TS, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T, double *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_ofc_var2 (int m_A, int n_A, int m_TS, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T, scomplex *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_ofz_var2 (int m_A, int n_A, int m_TS, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T, dcomplex *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_ofu_var3 (FLA_Obj A, FLA_Obj T, FLA_Obj S)
 
FLA_Error FLA_Bidiag_UT_u_step_ofu_var3 (FLA_Obj A, FLA_Obj T, FLA_Obj S)
 
FLA_Error FLA_Bidiag_UT_u_step_ofs_var3 (int m_A, int n_A, int m_TS, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T, float *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_ofd_var3 (int m_A, int n_A, int m_TS, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T, double *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_ofc_var3 (int m_A, int n_A, int m_TS, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T, scomplex *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_ofz_var3 (int m_A, int n_A, int m_TS, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T, dcomplex *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_ofu_var4 (FLA_Obj A, FLA_Obj T, FLA_Obj S)
 
FLA_Error FLA_Bidiag_UT_u_step_ofu_var4 (FLA_Obj A, FLA_Obj Y, FLA_Obj Z, FLA_Obj T, FLA_Obj S)
 
FLA_Error FLA_Bidiag_UT_u_step_ofs_var4 (int m_A, int n_A, int m_TS, float *buff_A, int rs_A, int cs_A, float *buff_Y, int rs_Y, int cs_Y, float *buff_Z, int rs_Z, int cs_Z, float *buff_T, int rs_T, int cs_T, float *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_ofd_var4 (int m_A, int n_A, int m_TS, double *buff_A, int rs_A, int cs_A, double *buff_Y, int rs_Y, int cs_Y, double *buff_Z, int rs_Z, int cs_Z, double *buff_T, int rs_T, int cs_T, double *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_ofc_var4 (int m_A, int n_A, int m_TS, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_Y, int rs_Y, int cs_Y, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_T, int rs_T, int cs_T, scomplex *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_ofz_var4 (int m_A, int n_A, int m_TS, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_Y, int rs_Y, int cs_Y, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_T, int rs_T, int cs_T, dcomplex *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Fused_Gerc2_opt_var1 (FLA_Obj alpha, FLA_Obj u, FLA_Obj y, FLA_Obj z, FLA_Obj v, FLA_Obj A)
 
FLA_Error FLA_Fused_Gerc2_ops_var1 (int m_A, int n_A, float *buff_alpha, float *buff_u, int inc_u, float *buff_y, int inc_y, float *buff_z, int inc_z, float *buff_v, int inc_v, float *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Fused_Gerc2_opd_var1 (int m_A, int n_A, double *buff_alpha, double *buff_u, int inc_u, double *buff_y, int inc_y, double *buff_z, int inc_z, double *buff_v, int inc_v, double *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Fused_Gerc2_opc_var1 (int m_A, int n_A, scomplex *buff_alpha, scomplex *buff_u, int inc_u, scomplex *buff_y, int inc_y, scomplex *buff_z, int inc_z, scomplex *buff_v, int inc_v, scomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Fused_Gerc2_opz_var1 (int m_A, int n_A, dcomplex *buff_alpha, dcomplex *buff_u, int inc_u, dcomplex *buff_y, int inc_y, dcomplex *buff_z, int inc_z, dcomplex *buff_v, int inc_v, dcomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Fused_Ahx_Axpy_Ax_opt_var1 (FLA_Obj A, FLA_Obj u, FLA_Obj tau, FLA_Obj a, FLA_Obj beta, FLA_Obj y, FLA_Obj w)
 
FLA_Error FLA_Fused_Ahx_Axpy_Ax_ops_var1 (int m_A, int n_A, float *buff_tau, float *buff_beta, float *buff_A, int rs_A, int cs_A, float *buff_u, int inc_u, float *buff_a, int inc_a, float *buff_y, int inc_y, float *buff_w, int inc_w)
 
FLA_Error FLA_Fused_Ahx_Axpy_Ax_opd_var1 (int m_A, int n_A, double *buff_tau, double *buff_beta, double *buff_A, int rs_A, int cs_A, double *buff_u, int inc_u, double *buff_a, int inc_a, double *buff_y, int inc_y, double *buff_w, int inc_w)
 
FLA_Error FLA_Fused_Ahx_Axpy_Ax_opc_var1 (int m_A, int n_A, scomplex *buff_tau, scomplex *buff_beta, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_u, int inc_u, scomplex *buff_a, int inc_a, scomplex *buff_y, int inc_y, scomplex *buff_w, int inc_w)
 
FLA_Error FLA_Fused_Ahx_Axpy_Ax_opz_var1 (int m_A, int n_A, dcomplex *buff_tau, dcomplex *buff_beta, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_u, int inc_u, dcomplex *buff_a, int inc_a, dcomplex *buff_y, int inc_y, dcomplex *buff_w, int inc_w)
 
FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_opt_var1 (FLA_Obj alpha, FLA_Obj tau, FLA_Obj u, FLA_Obj y, FLA_Obj z, FLA_Obj v, FLA_Obj A, FLA_Obj up, FLA_Obj a, FLA_Obj w)
 
FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_ops_var1 (int m_A, int n_A, float *buff_tau, float *buff_alpha, float *buff_u, int inc_u, float *buff_y, int inc_y, float *buff_z, int inc_z, float *buff_v, int inc_v, float *buff_A, int rs_A, int cs_A, float *buff_up, int inc_up, float *buff_a, int inc_a, float *buff_w, int inc_w)
 
FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_opd_var1 (int m_A, int n_A, double *buff_tau, double *buff_alpha, double *buff_u, int inc_u, double *buff_y, int inc_y, double *buff_z, int inc_z, double *buff_v, int inc_v, double *buff_A, int rs_A, int cs_A, double *buff_up, int inc_up, double *buff_a, int inc_a, double *buff_w, int inc_w)
 
FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_opc_var1 (int m_A, int n_A, scomplex *buff_tau, scomplex *buff_alpha, scomplex *buff_u, int inc_u, scomplex *buff_y, int inc_y, scomplex *buff_z, int inc_z, scomplex *buff_v, int inc_v, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_up, int inc_up, scomplex *buff_a, int inc_a, scomplex *buff_w, int inc_w)
 
FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_opz_var1 (int m_A, int n_A, dcomplex *buff_tau, dcomplex *buff_alpha, dcomplex *buff_u, int inc_u, dcomplex *buff_y, int inc_y, dcomplex *buff_z, int inc_z, dcomplex *buff_v, int inc_v, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_up, int inc_up, dcomplex *buff_a, int inc_a, dcomplex *buff_w, int inc_w)
 
FLA_Error FLA_Fused_UYx_ZVx_opt_var1 (FLA_Obj delta, FLA_Obj a, FLA_Obj U, FLA_Obj Y, FLA_Obj Z, FLA_Obj V, FLA_Obj A, FLA_Obj temp, FLA_Obj t, FLA_Obj w, FLA_Obj al)
 
FLA_Error FLA_Fused_UYx_ZVx_ops_var1 (int m_U, int n_U, int m_V, int n_V, float *buff_delta, float *buff_U, int rs_U, int cs_U, float *buff_Y, int rs_Y, int cs_Y, float *buff_Z, int rs_Z, int cs_Z, float *buff_V, int rs_V, int cs_V, float *buff_A, int rs_A, int cs_A, float *buff_temp, int inc_temp, float *buff_t, int inc_t, float *buff_a, int inc_a, float *buff_w, int inc_w, float *buff_al, int inc_al)
 
FLA_Error FLA_Fused_UYx_ZVx_opd_var1 (int m_U, int n_U, int m_V, int n_V, double *buff_delta, double *buff_U, int rs_U, int cs_U, double *buff_Y, int rs_Y, int cs_Y, double *buff_Z, int rs_Z, int cs_Z, double *buff_V, int rs_V, int cs_V, double *buff_A, int rs_A, int cs_A, double *buff_temp, int inc_temp, double *buff_t, int inc_t, double *buff_a, int inc_a, double *buff_w, int inc_w, double *buff_al, int inc_al)
 
FLA_Error FLA_Fused_UYx_ZVx_opc_var1 (int m_U, int n_U, int m_V, int n_V, scomplex *buff_delta, scomplex *buff_U, int rs_U, int cs_U, scomplex *buff_Y, int rs_Y, int cs_Y, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_V, int rs_V, int cs_V, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_temp, int inc_temp, scomplex *buff_t, int inc_t, scomplex *buff_a, int inc_a, scomplex *buff_w, int inc_w, scomplex *buff_al, int inc_al)
 
FLA_Error FLA_Fused_UYx_ZVx_opz_var1 (int m_U, int n_U, int m_V, int n_V, dcomplex *buff_delta, dcomplex *buff_U, int rs_U, int cs_U, dcomplex *buff_Y, int rs_Y, int cs_Y, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_V, int rs_V, int cs_V, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_temp, int inc_temp, dcomplex *buff_t, int inc_t, dcomplex *buff_a, int inc_a, dcomplex *buff_w, int inc_w, dcomplex *buff_al, int inc_al)
 

Function Documentation

◆ FLA_Bidiag_UT_u_blf_var2()

FLA_Error FLA_Bidiag_UT_u_blf_var2 ( FLA_Obj  A,
FLA_Obj  TU,
FLA_Obj  TV 
)
14{
15 FLA_Obj ATL, ATR, A00, A01, A02,
16 ABL, ABR, A10, A11, A12,
17 A20, A21, A22;
18 FLA_Obj TUL, TUR, TU0, TU1, TU2;
19 FLA_Obj TVL, TVR, TV0, TV1, TV2;
20
24 dim_t b_alg, b;
25
27
28 FLA_Part_2x2( A, &ATL, &ATR,
29 &ABL, &ABR, 0, 0, FLA_TL );
30 FLA_Part_1x2( TU, &TUL, &TUR, 0, FLA_LEFT );
31 FLA_Part_1x2( TV, &TVL, &TVR, 0, FLA_LEFT );
32
33 while ( FLA_Obj_min_dim( ABR ) > 0 )
34 {
35 b = min( FLA_Obj_min_dim( ABR ), b_alg );
36
37 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
38 /* ************* */ /* ******************** */
39 &A10, /**/ &A11, &A12,
40 ABL, /**/ ABR, &A20, /**/ &A21, &A22,
41 b, b, FLA_BR );
42 FLA_Repart_1x2_to_1x3( TUL, /**/ TUR, &TU0, /**/ &TU1, &TU2,
43 b, FLA_RIGHT );
44 FLA_Repart_1x2_to_1x3( TVL, /**/ TVR, &TV0, /**/ &TV1, &TV2,
45 b, FLA_RIGHT );
46
47 /*------------------------------------------------------------*/
48
50 &none2, &none3, b, b, FLA_TL );
51
53 &none2, &none3, b, b, FLA_TL );
54
55 // [ ABR, T1 ] = FLA_Bidiag_UT_u_step_unb_var2( ABR, TU1, TV1, b );
56 //FLA_Bidiag_UT_u_step_unb_var2( ABR, TU1_tl, TV1_tl );
58 //FLA_Bidiag_UT_u_step_opt_var2( ABR, TU1_tl, TV1_tl );
59
60 /*------------------------------------------------------------*/
61
62 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
63 A10, A11, /**/ A12,
64 /* ************** */ /* ****************** */
65 &ABL, /**/ &ABR, A20, A21, /**/ A22,
66 FLA_TL );
67 FLA_Cont_with_1x3_to_1x2( &TUL, /**/ &TUR, TU0, TU1, /**/ TU2,
68 FLA_LEFT );
69 FLA_Cont_with_1x3_to_1x2( &TVL, /**/ &TVR, TV0, TV1, /**/ TV2,
70 FLA_LEFT );
71 }
72
73 return FLA_SUCCESS;
74}
FLA_Error FLA_Bidiag_UT_u_step_ofu_var2(FLA_Obj A, FLA_Obj T, FLA_Obj S)
Definition FLA_Bidiag_UT_u_fus_var2.c:18
FLA_Error FLA_Cont_with_3x3_to_2x2(FLA_Obj *ATL, FLA_Obj *ATR, FLA_Obj A00, FLA_Obj A01, FLA_Obj A02, FLA_Obj A10, FLA_Obj A11, FLA_Obj A12, FLA_Obj *ABL, FLA_Obj *ABR, FLA_Obj A20, FLA_Obj A21, FLA_Obj A22, FLA_Quadrant quadrant)
Definition FLA_View.c:304
FLA_Error FLA_Part_2x2(FLA_Obj A, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition FLA_View.c:17
FLA_Error FLA_Cont_with_1x3_to_1x2(FLA_Obj *AL, FLA_Obj *AR, FLA_Obj A0, FLA_Obj A1, FLA_Obj A2, FLA_Side side)
Definition FLA_View.c:475
FLA_Error FLA_Part_1x2(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition FLA_View.c:110
dim_t FLA_Obj_length(FLA_Obj obj)
Definition FLA_Query.c:116
FLA_Error FLA_Repart_2x2_to_3x3(FLA_Obj ATL, FLA_Obj ATR, FLA_Obj *A00, FLA_Obj *A01, FLA_Obj *A02, FLA_Obj *A10, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj ABL, FLA_Obj ABR, FLA_Obj *A20, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition FLA_View.c:142
FLA_Error FLA_Repart_1x2_to_1x3(FLA_Obj AL, FLA_Obj AR, FLA_Obj *A0, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition FLA_View.c:267
dim_t FLA_Obj_min_dim(FLA_Obj obj)
Definition FLA_Query.c:153
unsigned long dim_t
Definition FLA_type_defs.h:71
int i
Definition bl1_axmyv2.c:145
Definition FLA_type_defs.h:159

References FLA_Bidiag_UT_u_step_ofu_var2(), FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x3_to_2x2(), FLA_Obj_length(), FLA_Obj_min_dim(), FLA_Part_1x2(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x2_to_3x3(), and i.

Referenced by FLA_Bidiag_UT_u().

◆ FLA_Bidiag_UT_u_blf_var3()

FLA_Error FLA_Bidiag_UT_u_blf_var3 ( FLA_Obj  A,
FLA_Obj  TU,
FLA_Obj  TV 
)
14{
15 FLA_Obj ATL, ATR, A00, A01, A02,
16 ABL, ABR, A10, A11, A12,
17 A20, A21, A22;
18 FLA_Obj TUL, TUR, TU0, TU1, TU2;
19 FLA_Obj TVL, TVR, TV0, TV1, TV2;
20
24 dim_t b_alg, b;
25
27
28 FLA_Part_2x2( A, &ATL, &ATR,
29 &ABL, &ABR, 0, 0, FLA_TL );
30 FLA_Part_1x2( TU, &TUL, &TUR, 0, FLA_LEFT );
31 FLA_Part_1x2( TV, &TVL, &TVR, 0, FLA_LEFT );
32
33 while ( FLA_Obj_min_dim( ABR ) > 0 )
34 {
35 b = min( FLA_Obj_min_dim( ABR ), b_alg );
36
37 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
38 /* ************* */ /* ******************** */
39 &A10, /**/ &A11, &A12,
40 ABL, /**/ ABR, &A20, /**/ &A21, &A22,
41 b, b, FLA_BR );
42 FLA_Repart_1x2_to_1x3( TUL, /**/ TUR, &TU0, /**/ &TU1, &TU2,
43 b, FLA_RIGHT );
44 FLA_Repart_1x2_to_1x3( TVL, /**/ TVR, &TV0, /**/ &TV1, &TV2,
45 b, FLA_RIGHT );
46
47 /*------------------------------------------------------------*/
48
50 &none2, &none3, b, b, FLA_TL );
51
53 &none2, &none3, b, b, FLA_TL );
54
55 // [ ABR, T1 ] = FLA_Bidiag_UT_u_step_unb_var3( ABR, TU1, TV1, b );
56 //FLA_Bidiag_UT_u_step_unb_var3( ABR, TU1_tl, TV1_tl );
58 //FLA_Bidiag_UT_u_step_opt_var3( ABR, TU1_tl, TV1_tl );
59
60 /*------------------------------------------------------------*/
61
62 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
63 A10, A11, /**/ A12,
64 /* ************** */ /* ****************** */
65 &ABL, /**/ &ABR, A20, A21, /**/ A22,
66 FLA_TL );
67 FLA_Cont_with_1x3_to_1x2( &TUL, /**/ &TUR, TU0, TU1, /**/ TU2,
68 FLA_LEFT );
69 FLA_Cont_with_1x3_to_1x2( &TVL, /**/ &TVR, TV0, TV1, /**/ TV2,
70 FLA_LEFT );
71 }
72
73 return FLA_SUCCESS;
74}
FLA_Error FLA_Bidiag_UT_u_step_ofu_var3(FLA_Obj A, FLA_Obj T, FLA_Obj S)
Definition FLA_Bidiag_UT_u_fus_var3.c:18

References FLA_Bidiag_UT_u_step_ofu_var3(), FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x3_to_2x2(), FLA_Obj_length(), FLA_Obj_min_dim(), FLA_Part_1x2(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x2_to_3x3(), and i.

Referenced by FLA_Bidiag_UT_u().

◆ FLA_Bidiag_UT_u_blf_var4()

FLA_Error FLA_Bidiag_UT_u_blf_var4 ( FLA_Obj  A,
FLA_Obj  TU,
FLA_Obj  TV 
)
14{
15 FLA_Obj ATL, ATR, A00, A01, A02,
16 ABL, ABR, A10, A11, A12,
17 A20, A21, A22;
18 FLA_Obj UT, U0,
19 UB, U1,
20 U2;
21 FLA_Obj VT, V0,
22 VB, V1,
23 V2;
24 FLA_Obj YT, Y0,
25 YB, Y1,
26 Y2;
27 FLA_Obj ZT, Z0,
28 ZB, Z1,
29 Z2;
32
33 FLA_Obj U, V, Y, Z;
43 VB_bl;
45 dim_t m_A, n_A;
46 dim_t b_alg, b;
47
49
51 m_A = FLA_Obj_length( A );
52 n_A = FLA_Obj_width( A );
53
54 FLA_Obj_create( datatype_A, m_A, b_alg, 0, 0, &U );
55 FLA_Obj_create( datatype_A, n_A, b_alg, 0, 0, &V );
56 FLA_Obj_create( datatype_A, n_A, b_alg, 0, 0, &Y );
57 FLA_Obj_create( datatype_A, m_A, b_alg, 0, 0, &Z );
58
59 FLA_Part_2x2( A, &ATL, &ATR,
60 &ABL, &ABR, 0, 0, FLA_TL );
61 FLA_Part_2x1( U, &UT,
62 &UB, 0, FLA_TOP );
63 FLA_Part_2x1( V, &VT,
64 &VB, 0, FLA_TOP );
65 FLA_Part_2x1( Y, &YT,
66 &YB, 0, FLA_TOP );
67 FLA_Part_2x1( Z, &ZT,
68 &ZB, 0, FLA_TOP );
69 FLA_Part_1x2( TU, &TUL, &TUR, 0, FLA_LEFT );
70 FLA_Part_1x2( TV, &TVL, &TVR, 0, FLA_LEFT );
71
72 while ( FLA_Obj_min_dim( ABR ) > 0 )
73 {
74 b = min( FLA_Obj_min_dim( ABR ), b_alg );
75
76 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
77 /* ************* */ /* ******************** */
78 &A10, /**/ &A11, &A12,
79 ABL, /**/ ABR, &A20, /**/ &A21, &A22,
80 b, b, FLA_BR );
82 /* ** */ /* ** */
83 &U1,
84 UB, &U2, b, FLA_BOTTOM );
86 /* ** */ /* ** */
87 &V1,
88 VB, &V2, b, FLA_BOTTOM );
90 /* ** */ /* ** */
91 &Y1,
92 YB, &Y2, b, FLA_BOTTOM );
94 /* ** */ /* ** */
95 &Z1,
96 ZB, &Z2, b, FLA_BOTTOM );
97 FLA_Repart_1x2_to_1x3( TUL, /**/ TUR, &TU0, /**/ &TU1, &TU2,
98 b, FLA_RIGHT );
99 FLA_Repart_1x2_to_1x3( TVL, /**/ TVR, &TV0, /**/ &TV1, &TV2,
100 b, FLA_RIGHT );
101
102 /*------------------------------------------------------------*/
103
105 &none2, &none3, b, b, FLA_TL );
106
108 &none2, &none3, b, b, FLA_TL );
109
112 &none, b, FLA_TOP );
113
118
120 &U2_l, b, FLA_TOP );
122 &V2_l, b, FLA_TOP );
124 &Y2_l, b, FLA_TOP );
126 &Z2_l, b, FLA_TOP );
127
128 // [ ABR, YB, ZB, TU1, TV1 ] = FLA_Bidiag_UT_u_step_unb_var4( ABR, TU1, TV1, b );
129 //FLA_Bidiag_UT_u_step_unb_var4( ABR, YB, ZB, TU1_tl, TV1_tl );
131 //FLA_Bidiag_UT_u_step_opt_var4( ABR, YB, ZB, TU1_tl, TV1_tl );
132
133 if ( FLA_Obj_length( A22 ) > 0 )
134 {
135 // Build UB from ABR, with explicit unit subdiagonal and zeros.
136 FLA_Copy( ABR_l, UB_l );
138
139 // Build VB from ABR, with explicit unit subdiagonal and zeros.
142 &VB_bl, 1, FLA_TOP );
145
146 // A22 = A22 - U2 * Y2' - Z2 * V2';
151 }
152
153 /*------------------------------------------------------------*/
154
155 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
156 A10, A11, /**/ A12,
157 /* ************** */ /* ****************** */
158 &ABL, /**/ &ABR, A20, A21, /**/ A22,
159 FLA_TL );
161 U1,
162 /* ** */ /* ** */
163 &UB, U2, FLA_TOP );
165 V1,
166 /* ** */ /* ** */
167 &VB, V2, FLA_TOP );
169 Y1,
170 /* ** */ /* ** */
171 &YB, Y2, FLA_TOP );
173 Z1,
174 /* ** */ /* ** */
175 &ZB, Z2, FLA_TOP );
176 FLA_Cont_with_1x3_to_1x2( &TUL, /**/ &TUR, TU0, TU1, /**/ TU2,
177 FLA_LEFT );
178 FLA_Cont_with_1x3_to_1x2( &TVL, /**/ &TVR, TV0, TV1, /**/ TV2,
179 FLA_LEFT );
180 }
181
182 FLA_Obj_free( &U );
183 FLA_Obj_free( &V );
184 FLA_Obj_free( &Y );
185 FLA_Obj_free( &Z );
186
187 return FLA_SUCCESS;
188}
FLA_Error FLA_Bidiag_UT_u_step_ofu_var4(FLA_Obj A, FLA_Obj Y, FLA_Obj Z, FLA_Obj T, FLA_Obj S)
Definition FLA_Bidiag_UT_u_fus_var4.c:35
FLA_Error FLA_Copy(FLA_Obj A, FLA_Obj B)
Definition FLA_Copy.c:15
FLA_Error FLA_Copyt(FLA_Trans trans, FLA_Obj A, FLA_Obj B)
Definition FLA_Copyt.c:15
FLA_Error FLA_Gemm_external(FLA_Trans transa, FLA_Trans transb, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C)
Definition FLA_Gemm_external.c:13
FLA_Obj FLA_MINUS_ONE
Definition FLA_Init.c:22
FLA_Obj FLA_ZERO
Definition FLA_Init.c:20
FLA_Obj FLA_ONE
Definition FLA_Init.c:18
FLA_Error FLA_Cont_with_3x1_to_2x1(FLA_Obj *AT, FLA_Obj A0, FLA_Obj A1, FLA_Obj *AB, FLA_Obj A2, FLA_Side side)
Definition FLA_View.c:428
FLA_Error FLA_Repart_2x1_to_3x1(FLA_Obj AT, FLA_Obj *A0, FLA_Obj *A1, FLA_Obj AB, FLA_Obj *A2, dim_t mb, FLA_Side side)
Definition FLA_View.c:226
dim_t FLA_Obj_width(FLA_Obj obj)
Definition FLA_Query.c:123
FLA_Error FLA_Obj_create(FLA_Datatype datatype, dim_t m, dim_t n, dim_t rs, dim_t cs, FLA_Obj *obj)
Definition FLA_Obj.c:55
FLA_Error FLA_Part_2x1(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t mb, FLA_Side side)
Definition FLA_View.c:76
FLA_Error FLA_Obj_free(FLA_Obj *obj)
Definition FLA_Obj.c:588
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition FLA_Query.c:13
int FLA_Datatype
Definition FLA_type_defs.h:49
FLA_Error FLA_Triangularize(FLA_Uplo uplo, FLA_Diag diag, FLA_Obj A)
Definition FLA_Triangularize.c:13
FLA_Error FLA_Set(FLA_Obj alpha, FLA_Obj A)
Definition FLA_Set.c:13

References FLA_Bidiag_UT_u_step_ofu_var4(), FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copy(), FLA_Copyt(), FLA_Gemm_external(), FLA_MINUS_ONE, FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_Obj_min_dim(), FLA_Obj_width(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Set(), FLA_Triangularize(), FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u().

◆ FLA_Bidiag_UT_u_blk_var1()

FLA_Error FLA_Bidiag_UT_u_blk_var1 ( FLA_Obj  A,
FLA_Obj  TU,
FLA_Obj  TV 
)
14{
15 FLA_Obj ATL, ATR, A00, A01, A02,
16 ABL, ABR, A10, A11, A12,
17 A20, A21, A22;
18 FLA_Obj TUL, TUR, TU0, TU1, TU2;
19 FLA_Obj TVL, TVR, TV0, TV1, TV2;
20
24 dim_t b_alg, b;
25
27
28 FLA_Part_2x2( A, &ATL, &ATR,
29 &ABL, &ABR, 0, 0, FLA_TL );
30 FLA_Part_1x2( TU, &TUL, &TUR, 0, FLA_LEFT );
31 FLA_Part_1x2( TV, &TVL, &TVR, 0, FLA_LEFT );
32
33 while ( FLA_Obj_min_dim( ABR ) > 0 )
34 {
35 b = min( FLA_Obj_min_dim( ABR ), b_alg );
36
37 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
38 /* ************* */ /* ******************** */
39 &A10, /**/ &A11, &A12,
40 ABL, /**/ ABR, &A20, /**/ &A21, &A22,
41 b, b, FLA_BR );
42 FLA_Repart_1x2_to_1x3( TUL, /**/ TUR, &TU0, /**/ &TU1, &TU2,
43 b, FLA_RIGHT );
44 FLA_Repart_1x2_to_1x3( TVL, /**/ TVR, &TV0, /**/ &TV1, &TV2,
45 b, FLA_RIGHT );
46
47 /*------------------------------------------------------------*/
48
50 &none2, &none3, b, b, FLA_TL );
51
53 &none2, &none3, b, b, FLA_TL );
54
55 // [ ABR, T1 ] = FLA_Bidiag_UT_u_step_unb_var1( ABR, TU1, TV1, b );
56 //FLA_Bidiag_UT_u_step_unb_var1( ABR, TU1_tl, TV1_tl );
58
59 /*------------------------------------------------------------*/
60
61 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
62 A10, A11, /**/ A12,
63 /* ************** */ /* ****************** */
64 &ABL, /**/ &ABR, A20, A21, /**/ A22,
65 FLA_TL );
66 FLA_Cont_with_1x3_to_1x2( &TUL, /**/ &TUR, TU0, TU1, /**/ TU2,
67 FLA_LEFT );
68 FLA_Cont_with_1x3_to_1x2( &TVL, /**/ &TVR, TV0, TV1, /**/ TV2,
69 FLA_LEFT );
70 }
71
72 return FLA_SUCCESS;
73}
FLA_Error FLA_Bidiag_UT_u_step_opt_var1(FLA_Obj A, FLA_Obj T, FLA_Obj S)
Definition FLA_Bidiag_UT_u_opt_var1.c:18

References FLA_Bidiag_UT_u_step_opt_var1(), FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x3_to_2x2(), FLA_Obj_length(), FLA_Obj_min_dim(), FLA_Part_1x2(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x2_to_3x3(), and i.

Referenced by FLA_Bidiag_UT_u().

◆ FLA_Bidiag_UT_u_blk_var2()

FLA_Error FLA_Bidiag_UT_u_blk_var2 ( FLA_Obj  A,
FLA_Obj  TU,
FLA_Obj  TV 
)
14{
15 FLA_Obj ATL, ATR, A00, A01, A02,
16 ABL, ABR, A10, A11, A12,
17 A20, A21, A22;
18 FLA_Obj TUL, TUR, TU0, TU1, TU2;
19 FLA_Obj TVL, TVR, TV0, TV1, TV2;
20
24 dim_t b_alg, b;
25
27
28 FLA_Part_2x2( A, &ATL, &ATR,
29 &ABL, &ABR, 0, 0, FLA_TL );
30 FLA_Part_1x2( TU, &TUL, &TUR, 0, FLA_LEFT );
31 FLA_Part_1x2( TV, &TVL, &TVR, 0, FLA_LEFT );
32
33 while ( FLA_Obj_min_dim( ABR ) > 0 )
34 {
35 b = min( FLA_Obj_min_dim( ABR ), b_alg );
36
37 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
38 /* ************* */ /* ******************** */
39 &A10, /**/ &A11, &A12,
40 ABL, /**/ ABR, &A20, /**/ &A21, &A22,
41 b, b, FLA_BR );
42 FLA_Repart_1x2_to_1x3( TUL, /**/ TUR, &TU0, /**/ &TU1, &TU2,
43 b, FLA_RIGHT );
44 FLA_Repart_1x2_to_1x3( TVL, /**/ TVR, &TV0, /**/ &TV1, &TV2,
45 b, FLA_RIGHT );
46
47 /*------------------------------------------------------------*/
48
50 &none2, &none3, b, b, FLA_TL );
51
53 &none2, &none3, b, b, FLA_TL );
54
55 // [ ABR, T1 ] = FLA_Bidiag_UT_u_step_unb_var2( ABR, TU1, TV1, b );
56 //FLA_Bidiag_UT_u_step_unb_var2( ABR, TU1_tl, TV1_tl );
57 //FLA_Bidiag_UT_u_step_ofu_var2( ABR, TU1_tl, TV1_tl );
59
60 /*------------------------------------------------------------*/
61
62 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
63 A10, A11, /**/ A12,
64 /* ************** */ /* ****************** */
65 &ABL, /**/ &ABR, A20, A21, /**/ A22,
66 FLA_TL );
67 FLA_Cont_with_1x3_to_1x2( &TUL, /**/ &TUR, TU0, TU1, /**/ TU2,
68 FLA_LEFT );
69 FLA_Cont_with_1x3_to_1x2( &TVL, /**/ &TVR, TV0, TV1, /**/ TV2,
70 FLA_LEFT );
71 }
72
73 return FLA_SUCCESS;
74}
FLA_Error FLA_Bidiag_UT_u_step_opt_var2(FLA_Obj A, FLA_Obj T, FLA_Obj S)
Definition FLA_Bidiag_UT_u_opt_var2.c:18

References FLA_Bidiag_UT_u_step_opt_var2(), FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x3_to_2x2(), FLA_Obj_length(), FLA_Obj_min_dim(), FLA_Part_1x2(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x2_to_3x3(), and i.

Referenced by FLA_Bidiag_UT_u().

◆ FLA_Bidiag_UT_u_blk_var3()

FLA_Error FLA_Bidiag_UT_u_blk_var3 ( FLA_Obj  A,
FLA_Obj  TU,
FLA_Obj  TV 
)
14{
15 FLA_Obj ATL, ATR, A00, A01, A02,
16 ABL, ABR, A10, A11, A12,
17 A20, A21, A22;
18 FLA_Obj TUL, TUR, TU0, TU1, TU2;
19 FLA_Obj TVL, TVR, TV0, TV1, TV2;
20
24 dim_t b_alg, b;
25
27
28 FLA_Part_2x2( A, &ATL, &ATR,
29 &ABL, &ABR, 0, 0, FLA_TL );
30 FLA_Part_1x2( TU, &TUL, &TUR, 0, FLA_LEFT );
31 FLA_Part_1x2( TV, &TVL, &TVR, 0, FLA_LEFT );
32
33 while ( FLA_Obj_min_dim( ABR ) > 0 )
34 {
35 b = min( FLA_Obj_min_dim( ABR ), b_alg );
36
37 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
38 /* ************* */ /* ******************** */
39 &A10, /**/ &A11, &A12,
40 ABL, /**/ ABR, &A20, /**/ &A21, &A22,
41 b, b, FLA_BR );
42 FLA_Repart_1x2_to_1x3( TUL, /**/ TUR, &TU0, /**/ &TU1, &TU2,
43 b, FLA_RIGHT );
44 FLA_Repart_1x2_to_1x3( TVL, /**/ TVR, &TV0, /**/ &TV1, &TV2,
45 b, FLA_RIGHT );
46
47 /*------------------------------------------------------------*/
48
50 &none2, &none3, b, b, FLA_TL );
51
53 &none2, &none3, b, b, FLA_TL );
54
55 // [ ABR, T1 ] = FLA_Bidiag_UT_u_step_unb_var3( ABR, TU1, TV1, b );
56 //FLA_Bidiag_UT_u_step_unb_var3( ABR, TU1_tl, TV1_tl );
57 //FLA_Bidiag_UT_u_step_ofu_var3( ABR, TU1_tl, TV1_tl );
59
60 /*------------------------------------------------------------*/
61
62 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
63 A10, A11, /**/ A12,
64 /* ************** */ /* ****************** */
65 &ABL, /**/ &ABR, A20, A21, /**/ A22,
66 FLA_TL );
67 FLA_Cont_with_1x3_to_1x2( &TUL, /**/ &TUR, TU0, TU1, /**/ TU2,
68 FLA_LEFT );
69 FLA_Cont_with_1x3_to_1x2( &TVL, /**/ &TVR, TV0, TV1, /**/ TV2,
70 FLA_LEFT );
71 }
72
73 return FLA_SUCCESS;
74}
FLA_Error FLA_Bidiag_UT_u_step_opt_var3(FLA_Obj A, FLA_Obj T, FLA_Obj S)
Definition FLA_Bidiag_UT_u_opt_var3.c:18

References FLA_Bidiag_UT_u_step_opt_var3(), FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x3_to_2x2(), FLA_Obj_length(), FLA_Obj_min_dim(), FLA_Part_1x2(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x2_to_3x3(), and i.

Referenced by FLA_Bidiag_UT_u().

◆ FLA_Bidiag_UT_u_blk_var4()

FLA_Error FLA_Bidiag_UT_u_blk_var4 ( FLA_Obj  A,
FLA_Obj  TU,
FLA_Obj  TV 
)
14{
15 FLA_Obj ATL, ATR, A00, A01, A02,
16 ABL, ABR, A10, A11, A12,
17 A20, A21, A22;
18 FLA_Obj UT, U0,
19 UB, U1,
20 U2;
21 FLA_Obj VT, V0,
22 VB, V1,
23 V2;
24 FLA_Obj YT, Y0,
25 YB, Y1,
26 Y2;
27 FLA_Obj ZT, Z0,
28 ZB, Z1,
29 Z2;
32
33 FLA_Obj U, V, Y, Z;
43 VB_bl;
45 dim_t m_A, n_A;
46 dim_t b_alg, b;
47
49
51 m_A = FLA_Obj_length( A );
52 n_A = FLA_Obj_width( A );
53
54 FLA_Obj_create( datatype_A, m_A, b_alg, 0, 0, &U );
55 FLA_Obj_create( datatype_A, n_A, b_alg, 0, 0, &V );
56 FLA_Obj_create( datatype_A, n_A, b_alg, 0, 0, &Y );
57 FLA_Obj_create( datatype_A, m_A, b_alg, 0, 0, &Z );
58
59 FLA_Part_2x2( A, &ATL, &ATR,
60 &ABL, &ABR, 0, 0, FLA_TL );
61 FLA_Part_2x1( U, &UT,
62 &UB, 0, FLA_TOP );
63 FLA_Part_2x1( V, &VT,
64 &VB, 0, FLA_TOP );
65 FLA_Part_2x1( Y, &YT,
66 &YB, 0, FLA_TOP );
67 FLA_Part_2x1( Z, &ZT,
68 &ZB, 0, FLA_TOP );
69 FLA_Part_1x2( TU, &TUL, &TUR, 0, FLA_LEFT );
70 FLA_Part_1x2( TV, &TVL, &TVR, 0, FLA_LEFT );
71
72 while ( FLA_Obj_min_dim( ABR ) > 0 )
73 {
74 b = min( FLA_Obj_min_dim( ABR ), b_alg );
75
76 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
77 /* ************* */ /* ******************** */
78 &A10, /**/ &A11, &A12,
79 ABL, /**/ ABR, &A20, /**/ &A21, &A22,
80 b, b, FLA_BR );
82 /* ** */ /* ** */
83 &U1,
84 UB, &U2, b, FLA_BOTTOM );
86 /* ** */ /* ** */
87 &V1,
88 VB, &V2, b, FLA_BOTTOM );
90 /* ** */ /* ** */
91 &Y1,
92 YB, &Y2, b, FLA_BOTTOM );
94 /* ** */ /* ** */
95 &Z1,
96 ZB, &Z2, b, FLA_BOTTOM );
97 FLA_Repart_1x2_to_1x3( TUL, /**/ TUR, &TU0, /**/ &TU1, &TU2,
98 b, FLA_RIGHT );
99 FLA_Repart_1x2_to_1x3( TVL, /**/ TVR, &TV0, /**/ &TV1, &TV2,
100 b, FLA_RIGHT );
101
102 /*------------------------------------------------------------*/
103
105 &none2, &none3, b, b, FLA_TL );
106
108 &none2, &none3, b, b, FLA_TL );
109
112 &none, b, FLA_TOP );
113
118
120 &U2_l, b, FLA_TOP );
122 &V2_l, b, FLA_TOP );
124 &Y2_l, b, FLA_TOP );
126 &Z2_l, b, FLA_TOP );
127
128 // [ ABR, YB, ZB, TU1, TV1 ] = FLA_Bidiag_UT_u_step_unb_var4( ABR, TU1, TV1, b );
129 //FLA_Bidiag_UT_u_step_unb_var4( ABR, YB, ZB, TU1_tl, TV1_tl );
130 //FLA_Bidiag_UT_u_step_ofu_var4( ABR, YB, ZB, TU1_tl, TV1_tl );
132
133 if ( FLA_Obj_length( A22 ) > 0 )
134 {
135 // Build UB from ABR, with explicit unit subdiagonal and zeros.
136 FLA_Copy( ABR_l, UB_l );
138
139 // Build VB from ABR, with explicit unit subdiagonal and zeros.
142 &VB_bl, 1, FLA_TOP );
145
146 // A22 = A22 - U2 * Y2' - Z2 * V2';
151 }
152
153 /*------------------------------------------------------------*/
154
155 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
156 A10, A11, /**/ A12,
157 /* ************** */ /* ****************** */
158 &ABL, /**/ &ABR, A20, A21, /**/ A22,
159 FLA_TL );
161 U1,
162 /* ** */ /* ** */
163 &UB, U2, FLA_TOP );
165 V1,
166 /* ** */ /* ** */
167 &VB, V2, FLA_TOP );
169 Y1,
170 /* ** */ /* ** */
171 &YB, Y2, FLA_TOP );
173 Z1,
174 /* ** */ /* ** */
175 &ZB, Z2, FLA_TOP );
176 FLA_Cont_with_1x3_to_1x2( &TUL, /**/ &TUR, TU0, TU1, /**/ TU2,
177 FLA_LEFT );
178 FLA_Cont_with_1x3_to_1x2( &TVL, /**/ &TVR, TV0, TV1, /**/ TV2,
179 FLA_LEFT );
180 }
181
182 FLA_Obj_free( &U );
183 FLA_Obj_free( &V );
184 FLA_Obj_free( &Y );
185 FLA_Obj_free( &Z );
186
187 return FLA_SUCCESS;
188}
FLA_Error FLA_Bidiag_UT_u_step_opt_var4(FLA_Obj A, FLA_Obj Y, FLA_Obj Z, FLA_Obj T, FLA_Obj S)
Definition FLA_Bidiag_UT_u_opt_var4.c:35

References FLA_Bidiag_UT_u_step_opt_var4(), FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copy(), FLA_Copyt(), FLA_Gemm_external(), FLA_MINUS_ONE, FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_Obj_min_dim(), FLA_Obj_width(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Set(), FLA_Triangularize(), FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u().

◆ FLA_Bidiag_UT_u_blk_var5()

FLA_Error FLA_Bidiag_UT_u_blk_var5 ( FLA_Obj  A,
FLA_Obj  TU,
FLA_Obj  TV 
)
14{
15 FLA_Obj ATL, ATR, A00, A01, A02,
16 ABL, ABR, A10, A11, A12,
17 A20, A21, A22;
18 FLA_Obj UT, U0,
19 UB, U1,
20 U2;
21 FLA_Obj VT, V0,
22 VB, V1,
23 V2;
24 FLA_Obj YT, Y0,
25 YB, Y1,
26 Y2;
27 FLA_Obj ZT, Z0,
28 ZB, Z1,
29 Z2;
32
33 FLA_Obj U, V, Y, Z;
43 VB_bl;
45 dim_t m_A, n_A;
46 dim_t b_alg, b;
47
49
51 m_A = FLA_Obj_length( A );
52 n_A = FLA_Obj_width( A );
53
54 FLA_Obj_create( datatype_A, m_A, b_alg, 0, 0, &U );
55 FLA_Obj_create( datatype_A, n_A, b_alg, 0, 0, &V );
56 FLA_Obj_create( datatype_A, n_A, b_alg, 0, 0, &Y );
57 FLA_Obj_create( datatype_A, m_A, b_alg, 0, 0, &Z );
58
59 FLA_Part_2x2( A, &ATL, &ATR,
60 &ABL, &ABR, 0, 0, FLA_TL );
61 FLA_Part_2x1( U, &UT,
62 &UB, 0, FLA_TOP );
63 FLA_Part_2x1( V, &VT,
64 &VB, 0, FLA_TOP );
65 FLA_Part_2x1( Y, &YT,
66 &YB, 0, FLA_TOP );
67 FLA_Part_2x1( Z, &ZT,
68 &ZB, 0, FLA_TOP );
69 FLA_Part_1x2( TU, &TUL, &TUR, 0, FLA_LEFT );
70 FLA_Part_1x2( TV, &TVL, &TVR, 0, FLA_LEFT );
71
72 while ( FLA_Obj_min_dim( ABR ) > 0 )
73 {
74 b = min( FLA_Obj_min_dim( ABR ), b_alg );
75
76 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
77 /* ************* */ /* ******************** */
78 &A10, /**/ &A11, &A12,
79 ABL, /**/ ABR, &A20, /**/ &A21, &A22,
80 b, b, FLA_BR );
82 /* ** */ /* ** */
83 &U1,
84 UB, &U2, b, FLA_BOTTOM );
86 /* ** */ /* ** */
87 &V1,
88 VB, &V2, b, FLA_BOTTOM );
90 /* ** */ /* ** */
91 &Y1,
92 YB, &Y2, b, FLA_BOTTOM );
94 /* ** */ /* ** */
95 &Z1,
96 ZB, &Z2, b, FLA_BOTTOM );
97 FLA_Repart_1x2_to_1x3( TUL, /**/ TUR, &TU0, /**/ &TU1, &TU2,
98 b, FLA_RIGHT );
99 FLA_Repart_1x2_to_1x3( TVL, /**/ TVR, &TV0, /**/ &TV1, &TV2,
100 b, FLA_RIGHT );
101
102 /*------------------------------------------------------------*/
103
105 &none2, &none3, b, b, FLA_TL );
106
108 &none2, &none3, b, b, FLA_TL );
109
112 &none, b, FLA_TOP );
113
118
120 &U2_l, b, FLA_TOP );
122 &V2_l, b, FLA_TOP );
124 &Y2_l, b, FLA_TOP );
126 &Z2_l, b, FLA_TOP );
127
128 // [ ABR, YB, ZB, TU1, TV1 ] = FLA_Bidiag_UT_u_step_unb_var5( ABR, TU1, TV1, b );
129 //FLA_Bidiag_UT_u_step_unb_var5( ABR, YB, ZB, TU1_tl, TV1_tl );
131
132 if ( FLA_Obj_length( A22 ) > 0 )
133 {
134 // Build UB from ABR, with explicit unit subdiagonal and zeros.
135 FLA_Copy( ABR_l, UB_l );
137
138 // Build VB from ABR, with explicit unit subdiagonal and zeros.
141 &VB_bl, 1, FLA_TOP );
144
145 // A22 = A22 - U2 * Y2' - Z2 * V2';
150 }
151
152 /*------------------------------------------------------------*/
153
154 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
155 A10, A11, /**/ A12,
156 /* ************** */ /* ****************** */
157 &ABL, /**/ &ABR, A20, A21, /**/ A22,
158 FLA_TL );
160 U1,
161 /* ** */ /* ** */
162 &UB, U2, FLA_TOP );
164 V1,
165 /* ** */ /* ** */
166 &VB, V2, FLA_TOP );
168 Y1,
169 /* ** */ /* ** */
170 &YB, Y2, FLA_TOP );
172 Z1,
173 /* ** */ /* ** */
174 &ZB, Z2, FLA_TOP );
175 FLA_Cont_with_1x3_to_1x2( &TUL, /**/ &TUR, TU0, TU1, /**/ TU2,
176 FLA_LEFT );
177 FLA_Cont_with_1x3_to_1x2( &TVL, /**/ &TVR, TV0, TV1, /**/ TV2,
178 FLA_LEFT );
179 }
180
181 FLA_Obj_free( &U );
182 FLA_Obj_free( &V );
183 FLA_Obj_free( &Y );
184 FLA_Obj_free( &Z );
185
186 return FLA_SUCCESS;
187}
FLA_Error FLA_Bidiag_UT_u_step_opt_var5(FLA_Obj A, FLA_Obj Y, FLA_Obj Z, FLA_Obj T, FLA_Obj S)
Definition FLA_Bidiag_UT_u_opt_var5.c:35

References FLA_Bidiag_UT_u_step_opt_var5(), FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copy(), FLA_Copyt(), FLA_Gemm_external(), FLA_MINUS_ONE, FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_Obj_min_dim(), FLA_Obj_width(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Set(), FLA_Triangularize(), FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u().

◆ FLA_Bidiag_UT_u_ofu_var2()

FLA_Error FLA_Bidiag_UT_u_ofu_var2 ( FLA_Obj  A,
FLA_Obj  T,
FLA_Obj  S 
)
14{
16}
FLA_Error FLA_Bidiag_UT_u_step_ofu_var2(FLA_Obj A, FLA_Obj T, FLA_Obj S)
Definition FLA_Bidiag_UT_u_fus_var2.c:18

References FLA_Bidiag_UT_u_step_ofu_var2(), and i.

◆ FLA_Bidiag_UT_u_ofu_var3()

FLA_Error FLA_Bidiag_UT_u_ofu_var3 ( FLA_Obj  A,
FLA_Obj  T,
FLA_Obj  S 
)
14{
16}
FLA_Error FLA_Bidiag_UT_u_step_ofu_var3(FLA_Obj A, FLA_Obj T, FLA_Obj S)
Definition FLA_Bidiag_UT_u_fus_var3.c:18

References FLA_Bidiag_UT_u_step_ofu_var3(), and i.

◆ FLA_Bidiag_UT_u_ofu_var4()

FLA_Error FLA_Bidiag_UT_u_ofu_var4 ( FLA_Obj  A,
FLA_Obj  T,
FLA_Obj  S 
)
14{
16 FLA_Obj Y, Z;
18 dim_t m_A, n_A;
19
21 m_A = FLA_Obj_length( A );
22 n_A = FLA_Obj_width( A );
23
24 FLA_Obj_create( datatype_A, n_A, n_A, 0, 0, &Y );
25 FLA_Obj_create( datatype_A, m_A, n_A, 0, 0, &Z );
26
28
29 FLA_Obj_free( &Y );
30 FLA_Obj_free( &Z );
31
32 return r_val;
33}
FLA_Error FLA_Bidiag_UT_u_step_ofu_var4(FLA_Obj A, FLA_Obj Y, FLA_Obj Z, FLA_Obj T, FLA_Obj S)
Definition FLA_Bidiag_UT_u_fus_var4.c:35
int FLA_Error
Definition FLA_type_defs.h:47

References FLA_Bidiag_UT_u_step_ofu_var4(), FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_Obj_width(), and i.

◆ FLA_Bidiag_UT_u_opt_var1()

FLA_Error FLA_Bidiag_UT_u_opt_var1 ( FLA_Obj  A,
FLA_Obj  T,
FLA_Obj  S 
)
14{
16}
FLA_Error FLA_Bidiag_UT_u_step_opt_var1(FLA_Obj A, FLA_Obj T, FLA_Obj S)
Definition FLA_Bidiag_UT_u_opt_var1.c:18

References FLA_Bidiag_UT_u_step_opt_var1(), and i.

Referenced by FLA_Bidiag_UT_u().

◆ FLA_Bidiag_UT_u_opt_var2()

FLA_Error FLA_Bidiag_UT_u_opt_var2 ( FLA_Obj  A,
FLA_Obj  T,
FLA_Obj  S 
)
14{
16}
FLA_Error FLA_Bidiag_UT_u_step_opt_var2(FLA_Obj A, FLA_Obj T, FLA_Obj S)
Definition FLA_Bidiag_UT_u_opt_var2.c:18

References FLA_Bidiag_UT_u_step_opt_var2(), and i.

Referenced by FLA_Bidiag_UT_u().

◆ FLA_Bidiag_UT_u_opt_var3()

FLA_Error FLA_Bidiag_UT_u_opt_var3 ( FLA_Obj  A,
FLA_Obj  T,
FLA_Obj  S 
)
14{
16}
FLA_Error FLA_Bidiag_UT_u_step_opt_var3(FLA_Obj A, FLA_Obj T, FLA_Obj S)
Definition FLA_Bidiag_UT_u_opt_var3.c:18

References FLA_Bidiag_UT_u_step_opt_var3(), and i.

Referenced by FLA_Bidiag_UT_u().

◆ FLA_Bidiag_UT_u_opt_var4()

FLA_Error FLA_Bidiag_UT_u_opt_var4 ( FLA_Obj  A,
FLA_Obj  T,
FLA_Obj  S 
)
14{
16 FLA_Obj Y, Z;
18 dim_t m_A, n_A;
19
21 m_A = FLA_Obj_length( A );
22 n_A = FLA_Obj_width( A );
23
24 FLA_Obj_create( datatype_A, n_A, n_A, 0, 0, &Y );
25 FLA_Obj_create( datatype_A, m_A, n_A, 0, 0, &Z );
26
28
29 FLA_Obj_free( &Y );
30 FLA_Obj_free( &Z );
31
32 return r_val;
33}
FLA_Error FLA_Bidiag_UT_u_step_opt_var4(FLA_Obj A, FLA_Obj Y, FLA_Obj Z, FLA_Obj T, FLA_Obj S)
Definition FLA_Bidiag_UT_u_opt_var4.c:35

References FLA_Bidiag_UT_u_step_opt_var4(), FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_Obj_width(), and i.

Referenced by FLA_Bidiag_UT_u().

◆ FLA_Bidiag_UT_u_opt_var5()

FLA_Error FLA_Bidiag_UT_u_opt_var5 ( FLA_Obj  A,
FLA_Obj  T,
FLA_Obj  S 
)
14{
16 FLA_Obj Y, Z;
18 dim_t m_A, n_A;
19
21 m_A = FLA_Obj_length( A );
22 n_A = FLA_Obj_width( A );
23
24 FLA_Obj_create( datatype_A, n_A, n_A, 0, 0, &Y );
25 FLA_Obj_create( datatype_A, m_A, n_A, 0, 0, &Z );
26
28
29 FLA_Obj_free( &Y );
30 FLA_Obj_free( &Z );
31
32 return r_val;
33}
FLA_Error FLA_Bidiag_UT_u_step_opt_var5(FLA_Obj A, FLA_Obj Y, FLA_Obj Z, FLA_Obj T, FLA_Obj S)
Definition FLA_Bidiag_UT_u_opt_var5.c:35

References FLA_Bidiag_UT_u_step_opt_var5(), FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_Obj_width(), and i.

Referenced by FLA_Bidiag_UT_u().

◆ FLA_Bidiag_UT_u_step_ofc_var2()

FLA_Error FLA_Bidiag_UT_u_step_ofc_var2 ( int  m_A,
int  n_A,
int  m_TS,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_T,
int  rs_T,
int  cs_T,
scomplex buff_S,
int  rs_S,
int  cs_S 
)
520{
524
526 int i;
527
528 // b_alg = FLA_Obj_length( T );
529 int b_alg = m_TS;
530
531 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
532 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
533 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
534 scomplex* buff_v = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
535 scomplex* buff_y = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
536 scomplex* buff_z = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
537 int inc_v = 1;
538 int inc_y = 1;
539 int inc_z = 1;
540
541 for ( i = 0; i < b_alg; ++i )
542 {
543 scomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
544 scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
545 scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
546 scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
547 scomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
548 scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
549 scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
550
551 scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
552 scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
553
554 scomplex* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
555 scomplex* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
556
557 scomplex* v21 = buff_v + (i+1)*inc_v;
558
559 scomplex* y21 = buff_y + (i+1)*inc_y;
560
561 scomplex* z21 = buff_z + (i+1)*inc_z;
562
563 scomplex* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
564 scomplex* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
565
566 scomplex* v21_t = v21 + (0 )*inc_v;
567 scomplex* v21_b = v21 + (1 )*inc_v;
568
569 int m_ahead = m_A - i - 1;
570 int n_ahead = n_A - i - 1;
571 int m_behind = i;
572 int n_behind = i;
573
574 /*------------------------------------------------------------*/
575
576 // FLA_Househ2_UT( FLA_LEFT,
577 // alpha11,
578 // a21, tau11 );
580 alpha11,
581 a21, rs_A,
582 tau11 );
583
584 if ( n_ahead > 0 )
585 {
586 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a12t, y21 );
587 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, a21, FLA_ONE, y21 );
589 n_ahead,
590 a12t, cs_A,
591 y21, inc_y );
594 m_ahead,
595 n_ahead,
596 buff_1,
597 A22, rs_A, cs_A,
598 a21, rs_A,
599 buff_1,
600 y21, inc_y );
601
602 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
604 n_ahead,
605 tau11,
606 y21, inc_y );
607
608 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, y21, a12t );
610 n_ahead,
611 buff_m1,
612 y21, inc_y,
613 a12t, cs_A );
614
615 // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
617 a12t_l,
618 a12t_r, cs_A,
619 sigma11 );
620
621 // FLA_Set( FLA_ONE, v21_t );
622 // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
623 *v21_t = *buff_1;
625 n_ahead - 1,
626 a12t_r, cs_A,
627 v21_b, inc_y );
628
629 // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
630 // FLA_Scal( FLA_MINUS_ONE, beta );
632 n_ahead,
633 y21, inc_y,
634 v21, inc_v,
635 &beta );
636 bl1_cneg1( &beta );
637
638 // FLA_Copy( a21, z21 );
639 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, v21, beta, z21 );
640 // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
642 m_ahead,
643 a21, rs_A,
644 z21, inc_z );
647 m_ahead,
648 n_ahead,
649 buff_1,
650 A22, rs_A, cs_A,
651 v21, inc_v,
652 &beta,
653 z21, inc_z );
655 m_ahead,
656 sigma11,
657 z21, inc_z );
658
659 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, a21, y21, A22 );
660 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
662 n_ahead,
663 buff_m1,
664 a21, rs_A,
665 y21, inc_y,
666 z21, inc_z,
667 v21, inc_v,
668 A22, rs_A, cs_A );
669
670 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
673 m_behind,
674 n_ahead,
675 buff_1,
676 A02, rs_A, cs_A,
677 v21, inc_v,
678 buff_0,
679 s01, rs_S );
680 }
681
682 // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
683 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
685 n_behind,
686 a10t, cs_A,
687 t01, rs_T );
690 m_ahead,
691 n_behind,
692 buff_1,
693 A20, rs_A, cs_A,
694 a21, rs_A,
695 buff_1,
696 t01, rs_T );
697
698 /*------------------------------------------------------------*/
699
700 }
701
702 // FLA_Obj_free( &v );
703 // FLA_Obj_free( &y );
704 // FLA_Obj_free( &z );
705 FLA_free( buff_v );
706 FLA_free( buff_y );
707 FLA_free( buff_z );
708
709 return FLA_SUCCESS;
710}
FLA_Error FLA_Fused_Gerc2_opc_var1(int m_A, int n_A, scomplex *buff_alpha, scomplex *buff_u, int inc_u, scomplex *buff_y, int inc_y, scomplex *buff_z, int inc_z, scomplex *buff_v, int inc_v, scomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Fused_Gerc2_opt_var1.c:241
void FLA_free(void *ptr)
Definition FLA_Memory.c:247
void * FLA_malloc(size_t size)
Definition FLA_Memory.c:111
FLA_Error FLA_Househ2_UT_r_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *tau)
Definition FLA_Househ2_UT.c:677
FLA_Error FLA_Househ2_UT_l_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *tau)
Definition FLA_Househ2_UT.c:390
void bl1_caxpyv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
Definition bl1_axpyv.c:29
void bl1_ccopyv(conj1_t conj, int m, scomplex *x, int incx, scomplex *y, int incy)
Definition bl1_copyv.c:49
void bl1_cdot(conj1_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho)
Definition bl1_dot.c:39
void bl1_cgemv(trans1_t transa, conj1_t conjx, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition bl1_gemv.c:125
void bl1_cinvscalv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx)
Definition bl1_invscalv.c:52
@ BLIS1_NO_TRANSPOSE
Definition blis_type_defs.h:54
@ BLIS1_CONJ_TRANSPOSE
Definition blis_type_defs.h:57
@ BLIS1_CONJ_NO_TRANSPOSE
Definition blis_type_defs.h:56
@ BLIS1_CONJUGATE
Definition blis_type_defs.h:82
@ BLIS1_NO_CONJUGATE
Definition blis_type_defs.h:81
Definition blis_type_defs.h:133

References bl1_caxpyv(), bl1_ccopyv(), bl1_cdot(), bl1_cgemv(), bl1_cinvscalv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Fused_Gerc2_opc_var1(), FLA_Househ2_UT_l_opc(), FLA_Househ2_UT_r_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_ofu_var2().

◆ FLA_Bidiag_UT_u_step_ofc_var3()

FLA_Error FLA_Bidiag_UT_u_step_ofc_var3 ( int  m_A,
int  n_A,
int  m_TS,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_T,
int  rs_T,
int  cs_T,
scomplex buff_S,
int  rs_S,
int  cs_S 
)
927{
931
941 int i;
942
943 // b_alg = FLA_Obj_length( T );
944 int b_alg = m_TS;
945
946 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
947 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
948 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
949 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
950 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
951 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
952 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
953 scomplex* buff_w = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
954 scomplex* buff_ap = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
955 scomplex* buff_u = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
956 scomplex* buff_up = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
957 scomplex* buff_v = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
958 scomplex* buff_y = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
959 scomplex* buff_z = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
960 int inc_w = 1;
961 int inc_ap = 1;
962 int inc_u = 1;
963 int inc_up = 1;
964 int inc_v = 1;
965 int inc_y = 1;
966 int inc_z = 1;
967
968 for ( i = 0; i < b_alg; ++i )
969 {
970 scomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
971 scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
972 scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
973 scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
974 scomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
975 scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
976 scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
977
978 scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
979 scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
980
981 scomplex* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
982 scomplex* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
983
984 scomplex* w21 = buff_w + (i+1)*inc_w;
985
986 scomplex* a12p = buff_ap + (i+1)*inc_ap;
987
989 scomplex* u21 = buff_u + (i+1)*inc_u;
990
991 scomplex* u21p = buff_up + (i+1)*inc_up;
992
993 scomplex* nu11 = buff_v + (i )*inc_v;
994 scomplex* v21 = buff_v + (i+1)*inc_v;
995
996 scomplex* psi11 = buff_y + (i )*inc_y;
997 scomplex* y21 = buff_y + (i+1)*inc_y;
998
999 scomplex* zeta11 = buff_z + (i )*inc_z;
1000 scomplex* z21 = buff_z + (i+1)*inc_z;
1001
1002 scomplex* a12p_t = a12p + (0 )*inc_ap;
1003 scomplex* a12p_b = a12p + (1 )*inc_ap;
1004
1005 scomplex* v21_t = v21 + (0 )*inc_v;
1006 scomplex* v21_b = v21 + (1 )*inc_v;
1007
1008 scomplex* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
1009 scomplex* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
1010
1011 scomplex* A22_l = A22 + (0 )*cs_A + (0 )*rs_A;
1012
1013 int m_ahead = m_A - i - 1;
1014 int n_ahead = n_A - i - 1;
1015 int m_behind = i;
1016 int n_behind = i;
1017
1018 /*------------------------------------------------------------*/
1019
1020 if ( m_behind > 0 )
1021 {
1022 // FLA_Copy( upsilon11, minus_upsilon11 );
1023 // FLA_Scal( FLA_MINUS_ONE, minus_upsilon11 );
1025
1026 // FLA_Copy( zeta11, minus_zeta11 );
1027 // FLA_Scal( FLA_MINUS_ONE, minus_zeta11 );
1029
1030 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, psi11, minus_conj_psi11 );
1031 // FLA_Scal( FLA_MINUS_ONE, minus_conj_psi11 );
1034
1035 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, nu11, minus_conj_nu11 );
1036 // FLA_Scal( FLA_MINUS_ONE, minus_conj_nu11 );
1039
1040 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, upsilon11, alpha11 );
1041 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11, zeta11, alpha11 );
1043 1,
1045 upsilon11, 1,
1046 alpha11, 1 );
1048 1,
1050 zeta11, 1,
1051 alpha11, 1 );
1052
1053 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, u21, a21 );
1054 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11, z21, a21 );
1056 m_ahead,
1058 u21, inc_u,
1059 a21, rs_A );
1061 m_ahead,
1063 z21, inc_z,
1064 a21, rs_A );
1065
1066 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_upsilon11, y21, a12t );
1067 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_zeta11, v21, a12t );
1069 n_ahead,
1071 y21, inc_y,
1072 a12t, cs_A );
1074 n_ahead,
1075 &minus_zeta11,
1076 v21, inc_v,
1077 a12t, cs_A );
1078 }
1079
1080 // FLA_Househ2_UT( FLA_LEFT,
1081 // alpha11,
1082 // a21, tau11 );
1083 // FLA_Copy( a21, u21p );
1085 alpha11,
1086 a21, rs_A,
1087 tau11 );
1089 m_ahead,
1090 a21, rs_A,
1091 u21p, inc_up );
1092
1093 if ( n_ahead > 0 )
1094 {
1095 // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
1096 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
1098
1099 // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
1100 // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
1102 n_ahead,
1103 a12t, cs_A,
1104 a12p, inc_ap );
1106 n_ahead,
1108 a12t, cs_A,
1109 a12p, inc_ap );
1110 }
1111
1112 if ( m_behind > 0 && n_ahead > 0 )
1113 {
1114 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u21, y21, A22 );
1115 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
1116 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, u21p, FLA_ZERO, y21 );
1117 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
1118 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
1120 n_ahead,
1121 tau11,
1122 buff_m1,
1123 u21, inc_u,
1124 y21, inc_y,
1125 z21, inc_z,
1126 v21, inc_v,
1127 A22, rs_A, cs_A,
1128 u21p, inc_up,
1129 a12p, inc_ap,
1130 w21, inc_w );
1131
1132
1133 }
1134 else if ( n_ahead > 0 )
1135 {
1136 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, u21p, FLA_ZERO, y21 );
1137 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
1138 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
1140 n_ahead,
1141 tau11,
1142 buff_0,
1143 A22, rs_A, cs_A,
1144 u21p, inc_up,
1145 a12p, inc_ap,
1146 y21, inc_y,
1147 w21, inc_w );
1148 }
1149
1150 if ( n_ahead > 0 )
1151 {
1152 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
1154 n_ahead,
1155 buff_1,
1156 a12t, cs_A,
1157 y21, inc_y );
1158
1159 // FLA_Househ2s_UT( FLA_RIGHT,
1160 // a12p_t,
1161 // a12p_b,
1162 // alpha12, psi11_minus_alpha12, sigma11 );
1164 a12p_t,
1165 a12p_b, inc_ap,
1166 &alpha12,
1168 sigma11 );
1169
1170 // FLA_Copy( a12p, v21 );
1171 // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
1172 // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
1173 // FLA_Conjugate( v21_b );
1175 n_ahead,
1176 a12p, inc_ap,
1177 v21, inc_v );
1180 n_ahead,
1182 v21, inc_v );
1183 bl1_cconjv( n_ahead - 1,
1184 v21_b, inc_v );
1185
1186 // FLA_Copyt( FLA_NO_TRANSPOSE, alpha12, a12t_l );
1187 // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
1188 *a12t_l = alpha12;
1190 n_ahead - 1,
1191 v21_b, inc_v,
1192 a12t_r, cs_A );
1193 }
1194
1195 // FLA_Copy( u21p, u21 );
1197 m_ahead,
1198 u21p, inc_up,
1199 u21, inc_u );
1200
1201 if ( n_ahead > 0 )
1202 {
1203 // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
1204 // FLA_Scal( FLA_MINUS_ONE, beta );
1205 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
1207 n_ahead,
1208 y21, inc_y,
1209 v21, inc_v,
1210 &beta );
1212
1213 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
1214 // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
1217
1218 // FLA_Copy( w21, z21 );
1219 // FLA_Axpy( minus_conj_alpha12, A22_l, z21 );
1220 // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
1221 // FLA_Axpy( beta, u21, z21 );
1223 m_ahead,
1224 w21, inc_w,
1225 z21, inc_z );
1227 m_ahead,
1229 A22_l, rs_A,
1230 z21, inc_z );
1232 m_ahead,
1234 z21, inc_z );
1236 m_ahead,
1237 &beta,
1238 u21, inc_u,
1239 z21, inc_z );
1240
1241 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
1242 // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
1244 n_ahead,
1245 tau11,
1246 y21, inc_y );
1248 m_ahead,
1249 sigma11,
1250 z21, inc_z );
1251
1252 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
1255 m_behind,
1256 n_ahead,
1257 buff_1,
1258 A02, rs_A, cs_A,
1259 v21, inc_v,
1260 buff_0,
1261 s01, rs_S );
1262 }
1263
1264 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
1265 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
1267 n_behind,
1268 a10t, cs_A,
1269 t01, rs_T );
1272 m_ahead,
1273 n_behind,
1274 buff_1,
1275 A20, rs_A, cs_A,
1276 u21, inc_u,
1277 buff_1,
1278 t01, rs_T );
1279
1280 if ( m_behind + 1 == b_alg && n_ahead > 0 )
1281 {
1282 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u21, y21, A22 );
1283 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
1286 m_ahead,
1287 n_ahead,
1288 buff_m1,
1289 u21, inc_u,
1290 y21, inc_y,
1291 A22, rs_A, cs_A );
1294 m_ahead,
1295 n_ahead,
1296 buff_m1,
1297 z21, inc_z,
1298 v21, inc_v,
1299 A22, rs_A, cs_A );
1300 }
1301
1302 /*------------------------------------------------------------*/
1303
1304 }
1305
1306 // FLA_Obj_free( &w );
1307 // FLA_Obj_free( &ap );
1308 // FLA_Obj_free( &u );
1309 // FLA_Obj_free( &up );
1310 // FLA_Obj_free( &v );
1311 // FLA_Obj_free( &y );
1312 // FLA_Obj_free( &z );
1313 FLA_free( buff_w );
1314 FLA_free( buff_ap );
1315 FLA_free( buff_u );
1316 FLA_free( buff_up );
1317 FLA_free( buff_v );
1318 FLA_free( buff_y );
1319 FLA_free( buff_z );
1320
1321 return FLA_SUCCESS;
1322}
FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_opc_var1(int m_A, int n_A, scomplex *buff_tau, scomplex *buff_alpha, scomplex *buff_u, int inc_u, scomplex *buff_y, int inc_y, scomplex *buff_z, int inc_z, scomplex *buff_v, int inc_v, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_up, int inc_up, scomplex *buff_a, int inc_a, scomplex *buff_w, int inc_w)
Definition FLA_Fused_Gerc2_Ahx_Axpy_Ax_opt_var1.c:424
FLA_Error FLA_Fused_Ahx_Axpy_Ax_opc_var1(int m_A, int n_A, scomplex *buff_tau, scomplex *buff_beta, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_u, int inc_u, scomplex *buff_a, int inc_a, scomplex *buff_y, int inc_y, scomplex *buff_w, int inc_w)
Definition FLA_Fused_Ahx_Axpy_Ax_opt_var1.c:322
FLA_Error FLA_Househ2s_UT_r_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *alpha, scomplex *chi_1_minus_alpha, scomplex *tau)
Definition FLA_Househ2s_UT.c:589
void bl1_cconjv(int m, scomplex *x, int incx)
Definition bl1_conjv.c:23
void bl1_cger(conj1_t conjx, conj1_t conjy, int m, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy, scomplex *a, int a_rs, int a_cs)
Definition bl1_ger.c:111

References bl1_caxpyv(), bl1_cconjv(), bl1_ccopyv(), bl1_cdot(), bl1_cgemv(), bl1_cger(), bl1_cinvscalv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, FLA_free(), FLA_Fused_Ahx_Axpy_Ax_opc_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_opc_var1(), FLA_Househ2_UT_l_opc(), FLA_Househ2s_UT_r_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_ofu_var3().

◆ FLA_Bidiag_UT_u_step_ofc_var4()

FLA_Error FLA_Bidiag_UT_u_step_ofc_var4 ( int  m_A,
int  n_A,
int  m_TS,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_Y,
int  rs_Y,
int  cs_Y,
scomplex buff_Z,
int  rs_Z,
int  cs_Z,
scomplex buff_T,
int  rs_T,
int  cs_T,
scomplex buff_S,
int  rs_S,
int  cs_S 
)
1103{
1107
1112 scomplex beta;
1114 int i;
1115
1116 // b_alg = FLA_Obj_length( T );
1117 int b_alg = m_TS;
1118
1119 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
1120 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &al );
1121 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
1122 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
1123 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
1124 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
1125 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &d );
1126 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &e );
1127 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
1128 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &g );
1129 scomplex* buff_tmp = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1130 scomplex* buff_w = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1131 scomplex* buff_al = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1132 scomplex* buff_ap = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1133 scomplex* buff_u = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1134 scomplex* buff_up = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1135 scomplex* buff_v = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1136 scomplex* buff_d = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1137 scomplex* buff_e = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1138 int inc_tmp = 1;
1139 int inc_w = 1;
1140 int inc_al = 1;
1141 int inc_ap = 1;
1142 int inc_u = 1;
1143 int inc_up = 1;
1144 int inc_v = 1;
1145 int inc_d = 1;
1146 int inc_e = 1;
1147
1148 // FLA_Set( FLA_ZERO, Y );
1149 // FLA_Set( FLA_ZERO, Z );
1150 bl1_csetm( n_A,
1151 b_alg,
1152 buff_0,
1153 buff_Y, rs_Y, cs_Y );
1154 bl1_csetm( m_A,
1155 b_alg,
1156 buff_0,
1157 buff_Z, rs_Z, cs_Z );
1158
1159 for ( i = 0; i < b_alg; ++i )
1160 {
1161 scomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
1162 scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
1163 scomplex* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
1164 scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
1165 scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
1166 scomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
1167 scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
1168 scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
1169
1170 scomplex* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
1171 scomplex* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
1172 scomplex* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
1173
1174 scomplex* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
1175 scomplex* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
1176 scomplex* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
1177
1178 scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
1179 scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
1180
1181 scomplex* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
1182 scomplex* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
1183
1184 scomplex* tmp21 = buff_tmp + (i+1)*inc_tmp;
1185
1186 scomplex* w21 = buff_w + (i+1)*inc_w;
1187
1188 scomplex* a22l = buff_al + (i+1)*inc_al;
1189
1190 scomplex* a12p = buff_ap + (i+1)*inc_ap;
1191
1192 scomplex* u21 = buff_u + (i+1)*inc_u;
1193
1194 scomplex* u21p = buff_up + (i+1)*inc_up;
1195
1196 scomplex* v21 = buff_v + (i+1)*inc_v;
1197
1198 scomplex* d0 = buff_d + (0 )*inc_d;
1199
1200 scomplex* e0 = buff_e + (0 )*inc_e;
1201
1202 scomplex* a12p_t = a12p + (0 )*inc_ap;
1203 scomplex* a12p_b = a12p + (1 )*inc_ap;
1204
1205 scomplex* v21_t = v21 + (0 )*inc_v;
1206 scomplex* v21_b = v21 + (1 )*inc_v;
1207
1208 scomplex* a01_b = a01 + (0 )*cs_A + (i-1)*rs_A;
1209
1210 scomplex* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
1211 scomplex* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
1212
1213 scomplex* ABL = a10t;
1214 scomplex* ZBL = z10t;
1215
1216 scomplex* a2 = alpha11;
1217
1218 int m_ahead = m_A - i - 1;
1219 int n_ahead = n_A - i - 1;
1220 int m_behind = i;
1221 int n_behind = i;
1222
1223 /*------------------------------------------------------------*/
1224
1225 if ( m_behind > 0 )
1226 {
1227 // FLA_Copy( a01_b, last_elem );
1228 // FLA_Set( FLA_ONE, a01_b );
1229 last_elem = *a01_b;
1230 *a01_b = *buff_1;
1231 }
1232
1233 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
1234 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a01, FLA_ONE, a2 );
1237 m_ahead + 1,
1238 n_behind,
1239 buff_m1,
1240 ABL, rs_A, cs_A,
1241 y10t, cs_Y,
1242 buff_1,
1243 a2, rs_A );
1246 m_ahead + 1,
1247 n_behind,
1248 buff_m1,
1249 ZBL, rs_Z, cs_Z,
1250 a01, rs_A,
1251 buff_1,
1252 a2, rs_A );
1253
1254 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
1255 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, A02, z10t, FLA_ONE, a12t );
1258 n_ahead,
1259 n_behind,
1260 buff_m1,
1261 Y20, rs_Y, cs_Y,
1262 a10t, cs_A,
1263 buff_1,
1264 a12t, cs_A );
1267 m_behind,
1268 n_ahead,
1269 buff_m1,
1270 A02, rs_A, cs_A,
1271 z10t, cs_Z,
1272 buff_1,
1273 a12t, cs_A );
1274
1275 if ( m_behind > 0 )
1276 {
1277 // FLA_Copy( last_elem, a01_b );
1278 *a01_b = last_elem;
1279 }
1280
1281 // FLA_Househ2_UT( FLA_LEFT,
1282 // alpha11,
1283 // a21, tau11 );
1284 // FLA_Copy( a21, u21p );
1286 alpha11,
1287 a21, rs_A,
1288 tau11 );
1290 m_ahead,
1291 a21, rs_A,
1292 u21p, inc_up );
1293
1294 if ( n_ahead > 0 )
1295 {
1296 // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
1297 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
1299
1300 // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
1301 // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
1303 n_ahead,
1304 a12t, cs_A,
1305 a12p, inc_ap );
1307 n_ahead,
1309 a12t, cs_A,
1310 a12p, inc_ap );
1311
1312 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21p, FLA_ZERO, d0 );
1313 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, u21p, FLA_ZERO, e0 );
1316 m_ahead,
1317 n_behind,
1318 buff_1,
1319 A20, rs_A, cs_A,
1320 u21p, inc_up,
1321 buff_0,
1322 d0, inc_d );
1325 m_ahead,
1326 n_behind,
1327 buff_1,
1328 Z20, rs_Z, cs_Z,
1329 u21p, inc_up,
1330 buff_0,
1331 e0, inc_e );
1332
1333 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
1334 // FLA_Axpy( FLA_ONE, d0, t01 );
1336 n_behind,
1337 a10t, cs_A,
1338 t01, rs_T );
1340 n_behind,
1341 buff_1,
1342 d0, inc_d,
1343 t01, rs_T );
1344
1345 // FLA_Set( FLA_ZERO, y21 );
1346 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
1347 // FLA_Gemv( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, e0, FLA_ONE, y21 );
1349 buff_0,
1350 y21, rs_Y );
1353 n_ahead,
1354 n_behind,
1355 buff_m1,
1356 Y20, rs_Y, cs_Y,
1357 d0, inc_d,
1358 buff_1,
1359 y21, rs_Y );
1362 m_behind,
1363 n_ahead,
1364 buff_m1,
1365 A02, rs_A, cs_A,
1366 e0, inc_e,
1367 buff_1,
1368 y21, rs_Y );
1369
1370 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, u21p, FLA_ONE, y21 );
1371 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
1372 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
1374 n_ahead,
1375 tau11,
1376 buff_1,
1377 A22, rs_A, cs_A,
1378 u21p, inc_up,
1379 a12p, inc_ap,
1380 y21, rs_Y,
1381 w21, inc_w );
1382
1383 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, Y20, a12p, FLA_ZERO, f0 );
1384 // FLA_Gemvc( FLA_CONJ_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A02, a12p, FLA_ZERO, g0 );
1385 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, w21 );
1386 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, g0, FLA_ONE, w21 );
1387 // FLA_Copy( A22_l, a22l );
1388 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, A20, Y20_t, FLA_ONE, a22l );
1389 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, Z20, A02_l, FLA_ONE, a22l );
1390 // FLA_Copy( g0, s01 );
1392 n_behind,
1393 m_behind,
1394 n_ahead,
1395 buff_m1,
1396 A20, rs_A, cs_A,
1397 Y20, rs_Y, cs_Y,
1398 Z20, rs_Z, cs_Z,
1399 A02, rs_A, cs_A,
1400 A22, rs_A, cs_A,
1401 tmp21, inc_tmp,
1402 s01, rs_S,
1403 a12p, inc_ap,
1404 w21, inc_w,
1405 a22l, inc_al );
1406
1407 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
1409 n_ahead,
1410 buff_1,
1411 a12t, cs_A,
1412 y21, rs_Y );
1413
1414 // FLA_Househ2s_UT( FLA_RIGHT,
1415 // a12p_t,
1416 // a12p_b,
1417 // alpha12, psi11_minus_alpha12, sigma11 );
1419 a12p_t,
1420 a12p_b, inc_ap,
1421 &alpha12,
1423 sigma11 );
1424
1425 // FLA_Copy( a12p, v21 );
1426 // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
1427 // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
1428 // FLA_Conjugate( v21_b );
1430 n_ahead,
1431 a12p, inc_ap,
1432 v21, inc_v );
1435 n_ahead,
1437 v21, inc_v );
1438 bl1_cconjv( n_ahead - 1,
1439 v21_b, inc_v );
1440
1441 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
1442 // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
1445
1446 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_conj_alpha12, A02_l, s01 );
1447 // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, s01 );
1449 n_behind,
1451 A02, rs_A,
1452 s01, rs_S );
1454 n_behind,
1456 s01, rs_S );
1457
1458 // FLA_Copy( alpha12, a12t_l );
1459 // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
1460 *a12t_l = alpha12;
1462 n_ahead - 1,
1463 v21_b, inc_v,
1464 a12t_r, cs_A );
1465 }
1466
1467 // FLA_Copy( u21p, u21 );
1469 m_ahead,
1470 u21p, inc_up,
1471 u21, inc_u );
1472
1473 if ( n_ahead > 0 )
1474 {
1475 // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
1476 // FLA_Scal( FLA_MINUS_ONE, beta );
1477 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
1479 n_ahead,
1480 y21, rs_Y,
1481 v21, inc_v,
1482 &beta );
1484
1485 // FLA_Copy( w21, z21 );
1486 // FLA_Axpy( minus_conj_alpha12, a22l, z21 );
1487 // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
1488 // FLA_Axpy( beta, u21, z21 );
1490 m_ahead,
1491 w21, inc_w,
1492 z21, rs_Z );
1494 m_ahead,
1496 a22l, inc_al,
1497 z21, rs_Z );
1499 m_ahead,
1501 z21, rs_Z );
1503 m_ahead,
1504 &beta,
1505 u21, inc_u,
1506 z21, rs_Z );
1507
1508 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
1509 // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
1511 n_ahead,
1512 tau11,
1513 y21, rs_Y );
1515 m_ahead,
1516 sigma11,
1517 z21, rs_Z );
1518 }
1519 else // if ( n_ahead == 0 )
1520 {
1521 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
1522 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
1524 n_behind,
1525 a10t, cs_A,
1526 t01, rs_T );
1529 m_ahead,
1530 n_behind,
1531 buff_1,
1532 A20, rs_A, cs_A,
1533 u21, inc_u,
1534 buff_1,
1535 t01, rs_T );
1536 }
1537
1538 /*------------------------------------------------------------*/
1539
1540 }
1541
1542 // FLA_Obj_free( &w );
1543 // FLA_Obj_free( &al );
1544 // FLA_Obj_free( &ap );
1545 // FLA_Obj_free( &u );
1546 // FLA_Obj_free( &up );
1547 // FLA_Obj_free( &v );
1548 // FLA_Obj_free( &d );
1549 // FLA_Obj_free( &e );
1550 FLA_free( buff_tmp );
1551 FLA_free( buff_w );
1552 FLA_free( buff_al );
1553 FLA_free( buff_ap );
1554 FLA_free( buff_u );
1555 FLA_free( buff_up );
1556 FLA_free( buff_v );
1557 FLA_free( buff_d );
1558 FLA_free( buff_e );
1559
1560 return FLA_SUCCESS;
1561}
FLA_Error FLA_Fused_UYx_ZVx_opc_var1(int m_U, int n_U, int m_V, int n_V, scomplex *buff_delta, scomplex *buff_U, int rs_U, int cs_U, scomplex *buff_Y, int rs_Y, int cs_Y, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_V, int rs_V, int cs_V, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_temp, int inc_temp, scomplex *buff_t, int inc_t, scomplex *buff_a, int inc_a, scomplex *buff_w, int inc_w, scomplex *buff_al, int inc_al)
Definition FLA_Fused_UYx_ZVx_opt_var1.c:424
void bl1_csetv(int m, scomplex *sigma, scomplex *x, int incx)
Definition bl1_setv.c:52
void bl1_csetm(int m, int n, scomplex *sigma, scomplex *a, int a_rs, int a_cs)
Definition bl1_setm.c:61
@ BLIS1_TRANSPOSE
Definition blis_type_defs.h:55

References bl1_caxpyv(), bl1_cconjv(), bl1_ccopyv(), bl1_cdot(), bl1_cgemv(), bl1_cinvscalv(), bl1_csetm(), bl1_csetv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_free(), FLA_Fused_Ahx_Axpy_Ax_opc_var1(), FLA_Fused_UYx_ZVx_opc_var1(), FLA_Househ2_UT_l_opc(), FLA_Househ2s_UT_r_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_ofu_var4().

◆ FLA_Bidiag_UT_u_step_ofd_var2()

FLA_Error FLA_Bidiag_UT_u_step_ofd_var2 ( int  m_A,
int  n_A,
int  m_TS,
double buff_A,
int  rs_A,
int  cs_A,
double buff_T,
int  rs_T,
int  cs_T,
double buff_S,
int  rs_S,
int  cs_S 
)
320{
321 double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
322 double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
324
325 double beta;
326 int i;
327
328 // b_alg = FLA_Obj_length( T );
329 int b_alg = m_TS;
330
331 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
332 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
333 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
334 double* buff_v = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
335 double* buff_y = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
336 double* buff_z = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
337 int inc_v = 1;
338 int inc_y = 1;
339 int inc_z = 1;
340
341 for ( i = 0; i < b_alg; ++i )
342 {
343 double* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
344 double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
345 double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
346 double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
347 double* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
348 double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
349 double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
350
351 double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
352 double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
353
354 double* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
355 double* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
356
357 double* v21 = buff_v + (i+1)*inc_v;
358
359 double* y21 = buff_y + (i+1)*inc_y;
360
361 double* z21 = buff_z + (i+1)*inc_z;
362
363 double* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
364 double* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
365
366 double* v21_t = v21 + (0 )*inc_v;
367 double* v21_b = v21 + (1 )*inc_v;
368
369 int m_ahead = m_A - i - 1;
370 int n_ahead = n_A - i - 1;
371 int m_behind = i;
372 int n_behind = i;
373
374 /*------------------------------------------------------------*/
375
376 // FLA_Househ2_UT( FLA_LEFT,
377 // alpha11,
378 // a21, tau11 );
380 alpha11,
381 a21, rs_A,
382 tau11 );
383
384 if ( n_ahead > 0 )
385 {
386 // FLA_Copyt( FLA_TRANSPOSE, a12t, y21 );
387 // FLA_Gemvc( FLA_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a21, FLA_ONE, y21 );
389 n_ahead,
390 a12t, cs_A,
391 y21, inc_y );
394 m_ahead,
395 n_ahead,
396 buff_1,
397 A22, rs_A, cs_A,
398 a21, rs_A,
399 buff_1,
400 y21, inc_y );
401
402 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
404 n_ahead,
405 tau11,
406 y21, inc_y );
407
408 // FLA_Axpyt( FLA_TRANSPOSE, FLA_MINUS_ONE, y21, a12t );
410 n_ahead,
411 buff_m1,
412 y21, inc_y,
413 a12t, cs_A );
414
415 // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
417 a12t_l,
418 a12t_r, cs_A,
419 sigma11 );
420
421 // FLA_Set( FLA_ONE, v21_t );
422 // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
423 *v21_t = *buff_1;
425 n_ahead - 1,
426 a12t_r, cs_A,
427 v21_b, inc_y );
428
429 // FLA_Dotc( FLA_CONJUGATE, v21, y21, beta );
430 // FLA_Scal( FLA_MINUS_ONE, beta );
432 n_ahead,
433 v21, inc_v,
434 y21, inc_y,
435 &beta );
436 bl1_dneg1( &beta );
437
438 // FLA_Copy( a21, z21 );
439 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, v21, beta, z21 );
440 // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
442 m_ahead,
443 a21, rs_A,
444 z21, inc_z );
447 m_ahead,
448 n_ahead,
449 buff_1,
450 A22, rs_A, cs_A,
451 v21, inc_v,
452 &beta,
453 z21, inc_z );
455 m_ahead,
456 sigma11,
457 z21, inc_z );
458
459 // FLA_Ger( FLA_MINUS_ONE, a21, y21, A22 );
460 // FLA_Ger( FLA_MINUS_ONE, z21, v21, A22 );
462 n_ahead,
463 buff_m1,
464 a21, rs_A,
465 y21, inc_y,
466 z21, inc_z,
467 v21, inc_v,
468 A22, rs_A, cs_A );
469
470 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
473 m_behind,
474 n_ahead,
475 buff_1,
476 A02, rs_A, cs_A,
477 v21, inc_v,
478 buff_0,
479 s01, rs_S );
480 }
481
482 // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
483 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
485 n_behind,
486 a10t, cs_A,
487 t01, rs_T );
490 m_ahead,
491 n_behind,
492 buff_1,
493 A20, rs_A, cs_A,
494 a21, rs_A,
495 buff_1,
496 t01, rs_T );
497
498 /*------------------------------------------------------------*/
499
500 }
501
502 // FLA_Obj_free( &v );
503 // FLA_Obj_free( &y );
504 // FLA_Obj_free( &z );
505 FLA_free( buff_v );
506 FLA_free( buff_y );
507 FLA_free( buff_z );
508
509 return FLA_SUCCESS;
510}
FLA_Error FLA_Fused_Gerc2_opd_var1(int m_A, int n_A, double *buff_alpha, double *buff_u, int inc_u, double *buff_y, int inc_y, double *buff_z, int inc_z, double *buff_v, int inc_v, double *buff_A, int rs_A, int cs_A)
Definition FLA_Fused_Gerc2_opt_var1.c:193
FLA_Error FLA_Househ2_UT_r_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *tau)
Definition FLA_Househ2_UT.c:664
FLA_Error FLA_Househ2_UT_l_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *tau)
Definition FLA_Househ2_UT.c:274
void bl1_daxpyv(conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy)
Definition bl1_axpyv.c:21
void bl1_dcopyv(conj1_t conj, int m, double *x, int incx, double *y, int incy)
Definition bl1_copyv.c:42
void bl1_ddot(conj1_t conj, int n, double *x, int incx, double *y, int incy, double *rho)
Definition bl1_dot.c:26
void bl1_dgemv(trans1_t transa, conj1_t conjx, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
Definition bl1_gemv.c:69
void bl1_dinvscalv(conj1_t conj, int n, double *alpha, double *x, int incx)
Definition bl1_invscalv.c:26

References bl1_daxpyv(), bl1_dcopyv(), bl1_ddot(), bl1_dgemv(), bl1_dinvscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_free(), FLA_Fused_Gerc2_opd_var1(), FLA_Househ2_UT_l_opd(), FLA_Househ2_UT_r_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_ofu_var2().

◆ FLA_Bidiag_UT_u_step_ofd_var3()

FLA_Error FLA_Bidiag_UT_u_step_ofd_var3 ( int  m_A,
int  n_A,
int  m_TS,
double buff_A,
int  rs_A,
int  cs_A,
double buff_T,
int  rs_T,
int  cs_T,
double buff_S,
int  rs_S,
int  cs_S 
)
522{
523 double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
524 double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
526
527 double alpha12;
528 double minus_conj_alpha12;
529 double psi11_minus_alpha12;
530 double minus_inv_tau11;
531 double minus_upsilon11;
532 double minus_conj_nu11;
533 double minus_conj_psi11;
534 double minus_zeta11;
535 double beta;
536 int i;
537
538 // b_alg = FLA_Obj_length( T );
539 int b_alg = m_TS;
540
541 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
542 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
543 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
544 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
545 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
546 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
547 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
548 double* buff_w = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
549 double* buff_ap = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
550 double* buff_u = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
551 double* buff_up = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
552 double* buff_v = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
553 double* buff_y = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
554 double* buff_z = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
555 int inc_w = 1;
556 int inc_ap = 1;
557 int inc_u = 1;
558 int inc_up = 1;
559 int inc_v = 1;
560 int inc_y = 1;
561 int inc_z = 1;
562
563 for ( i = 0; i < b_alg; ++i )
564 {
565 double* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
566 double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
567 double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
568 double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
569 double* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
570 double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
571 double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
572
573 double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
574 double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
575
576 double* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
577 double* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
578
579 double* w21 = buff_w + (i+1)*inc_w;
580
581 double* a12p = buff_ap + (i+1)*inc_ap;
582
583 double* upsilon11 = buff_u + (i )*inc_u;
584 double* u21 = buff_u + (i+1)*inc_u;
585
586 double* u21p = buff_up + (i+1)*inc_up;
587
588 double* nu11 = buff_v + (i )*inc_v;
589 double* v21 = buff_v + (i+1)*inc_v;
590
591 double* psi11 = buff_y + (i )*inc_y;
592 double* y21 = buff_y + (i+1)*inc_y;
593
594 double* zeta11 = buff_z + (i )*inc_z;
595 double* z21 = buff_z + (i+1)*inc_z;
596
597 double* a12p_t = a12p + (0 )*inc_ap;
598 double* a12p_b = a12p + (1 )*inc_ap;
599
600 double* v21_t = v21 + (0 )*inc_v;
601 double* v21_b = v21 + (1 )*inc_v;
602
603 double* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
604 double* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
605
606 double* A22_l = A22 + (0 )*cs_A + (0 )*rs_A;
607
608 int m_ahead = m_A - i - 1;
609 int n_ahead = n_A - i - 1;
610 int m_behind = i;
611 int n_behind = i;
612
613 /*------------------------------------------------------------*/
614
615 if ( m_behind > 0 )
616 {
617 // FLA_Copy( upsilon11, minus_upsilon11 );
618 // FLA_Scal( FLA_MINUS_ONE, minus_upsilon11 );
620
621 // FLA_Copy( zeta11, minus_zeta11 );
622 // FLA_Scal( FLA_MINUS_ONE, minus_zeta11 );
624
625 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, psi11, minus_conj_psi11 );
626 // FLA_Scal( FLA_MINUS_ONE, minus_conj_psi11 );
629
630 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, nu11, minus_conj_nu11 );
631 // FLA_Scal( FLA_MINUS_ONE, minus_conj_nu11 );
634
635 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, upsilon11, alpha11 );
636 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11, zeta11, alpha11 );
638 1,
640 upsilon11, 1,
641 alpha11, 1 );
643 1,
645 zeta11, 1,
646 alpha11, 1 );
647
648 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, u21, a21 );
649 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11, z21, a21 );
651 m_ahead,
653 u21, inc_u,
654 a21, rs_A );
656 m_ahead,
658 z21, inc_z,
659 a21, rs_A );
660
661 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_upsilon11, y21, a12t );
662 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_zeta11, v21, a12t );
664 n_ahead,
666 y21, inc_y,
667 a12t, cs_A );
669 n_ahead,
671 v21, inc_v,
672 a12t, cs_A );
673 }
674
675 // FLA_Househ2_UT( FLA_LEFT,
676 // alpha11,
677 // a21, tau11 );
678 // FLA_Copy( a21, u21p );
680 alpha11,
681 a21, rs_A,
682 tau11 );
684 m_ahead,
685 a21, rs_A,
686 u21p, inc_up );
687
688 if ( n_ahead > 0 )
689 {
690 // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
691 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
693
694 // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
695 // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
697 n_ahead,
698 a12t, cs_A,
699 a12p, inc_ap );
701 n_ahead,
703 a12t, cs_A,
704 a12p, inc_ap );
705 }
706
707 if ( m_behind > 0 && n_ahead > 0 )
708 {
709 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u21, y21, A22 );
710 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
711 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, u21p, FLA_ZERO, y21 );
712 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
713 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
715 n_ahead,
716 tau11,
717 buff_m1,
718 u21, inc_u,
719 y21, inc_y,
720 z21, inc_z,
721 v21, inc_v,
722 A22, rs_A, cs_A,
723 u21p, inc_up,
724 a12p, inc_ap,
725 w21, inc_w );
726
727
728 }
729 else if ( n_ahead > 0 )
730 {
731 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, u21p, FLA_ZERO, y21 );
732 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
733 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
735 n_ahead,
736 tau11,
737 buff_0,
738 A22, rs_A, cs_A,
739 u21p, inc_up,
740 a12p, inc_ap,
741 y21, inc_y,
742 w21, inc_w );
743 }
744
745 if ( n_ahead > 0 )
746 {
747 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
749 n_ahead,
750 buff_1,
751 a12t, cs_A,
752 y21, inc_y );
753
754 // FLA_Househ2s_UT( FLA_RIGHT,
755 // a12p_t,
756 // a12p_b,
757 // alpha12, psi11_minus_alpha12, sigma11 );
759 a12p_t,
760 a12p_b, inc_ap,
761 &alpha12,
763 sigma11 );
764
765 // FLA_Copy( a12p, v21 );
766 // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
767 // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
768 // FLA_Conjugate( v21_b );
770 n_ahead,
771 a12p, inc_ap,
772 v21, inc_v );
775 n_ahead,
777 v21, inc_v );
778 bl1_dconjv( n_ahead - 1,
779 v21_b, inc_v );
780
781 // FLA_Copyt( FLA_NO_TRANSPOSE, alpha12, a12t_l );
782 // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
783 *a12t_l = alpha12;
785 n_ahead - 1,
786 v21_b, inc_v,
787 a12t_r, cs_A );
788 }
789
790 // FLA_Copy( u21p, u21 );
792 m_ahead,
793 u21p, inc_up,
794 u21, inc_u );
795
796 if ( n_ahead > 0 )
797 {
798 // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
799 // FLA_Scal( FLA_MINUS_ONE, beta );
800 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
802 n_ahead,
803 y21, inc_y,
804 v21, inc_v,
805 &beta );
807
808 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
809 // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
812
813 // FLA_Copy( w21, z21 );
814 // FLA_Axpy( minus_conj_alpha12, A22_l, z21 );
815 // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
816 // FLA_Axpy( beta, u21, z21 );
818 m_ahead,
819 w21, inc_w,
820 z21, inc_z );
822 m_ahead,
824 A22_l, rs_A,
825 z21, inc_z );
827 m_ahead,
829 z21, inc_z );
831 m_ahead,
832 &beta,
833 u21, inc_u,
834 z21, inc_z );
835
836 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
837 // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
839 n_ahead,
840 tau11,
841 y21, inc_y );
843 m_ahead,
844 sigma11,
845 z21, inc_z );
846
847 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
850 m_behind,
851 n_ahead,
852 buff_1,
853 A02, rs_A, cs_A,
854 v21, inc_v,
855 buff_0,
856 s01, rs_S );
857 }
858
859 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
860 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
862 n_behind,
863 a10t, cs_A,
864 t01, rs_T );
867 m_ahead,
868 n_behind,
869 buff_1,
870 A20, rs_A, cs_A,
871 u21, inc_u,
872 buff_1,
873 t01, rs_T );
874
875 if ( m_behind + 1 == b_alg && n_ahead > 0 )
876 {
877 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u21, y21, A22 );
878 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
881 m_ahead,
882 n_ahead,
883 buff_m1,
884 u21, inc_u,
885 y21, inc_y,
886 A22, rs_A, cs_A );
889 m_ahead,
890 n_ahead,
891 buff_m1,
892 z21, inc_z,
893 v21, inc_v,
894 A22, rs_A, cs_A );
895 }
896
897 /*------------------------------------------------------------*/
898
899 }
900
901 // FLA_Obj_free( &w );
902 // FLA_Obj_free( &ap );
903 // FLA_Obj_free( &u );
904 // FLA_Obj_free( &up );
905 // FLA_Obj_free( &v );
906 // FLA_Obj_free( &y );
907 // FLA_Obj_free( &z );
908 FLA_free( buff_w );
909 FLA_free( buff_ap );
910 FLA_free( buff_u );
911 FLA_free( buff_up );
912 FLA_free( buff_v );
913 FLA_free( buff_y );
914 FLA_free( buff_z );
915
916 return FLA_SUCCESS;
917}
FLA_Error FLA_Fused_Ahx_Axpy_Ax_opd_var1(int m_A, int n_A, double *buff_tau, double *buff_beta, double *buff_A, int rs_A, int cs_A, double *buff_u, int inc_u, double *buff_a, int inc_a, double *buff_y, int inc_y, double *buff_w, int inc_w)
Definition FLA_Fused_Ahx_Axpy_Ax_opt_var1.c:207
FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_opd_var1(int m_A, int n_A, double *buff_tau, double *buff_alpha, double *buff_u, int inc_u, double *buff_y, int inc_y, double *buff_z, int inc_z, double *buff_v, int inc_v, double *buff_A, int rs_A, int cs_A, double *buff_up, int inc_up, double *buff_a, int inc_a, double *buff_w, int inc_w)
Definition FLA_Fused_Gerc2_Ahx_Axpy_Ax_opt_var1.c:267
FLA_Error FLA_Househ2s_UT_r_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *alpha, double *chi_1_minus_alpha, double *tau)
Definition FLA_Househ2s_UT.c:572
void bl1_dconjv(int m, double *x, int incx)
Definition bl1_conjv.c:18
void bl1_dger(conj1_t conjx, conj1_t conjy, int m, int n, double *alpha, double *x, int incx, double *y, int incy, double *a, int a_rs, int a_cs)
Definition bl1_ger.c:62

References bl1_daxpyv(), bl1_dconjv(), bl1_dcopyv(), bl1_ddot(), bl1_dgemv(), bl1_dger(), bl1_dinvscalv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, FLA_free(), FLA_Fused_Ahx_Axpy_Ax_opd_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_opd_var1(), FLA_Househ2_UT_l_opd(), FLA_Househ2s_UT_r_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_ofu_var3().

◆ FLA_Bidiag_UT_u_step_ofd_var4()

FLA_Error FLA_Bidiag_UT_u_step_ofd_var4 ( int  m_A,
int  n_A,
int  m_TS,
double buff_A,
int  rs_A,
int  cs_A,
double buff_Y,
int  rs_Y,
int  cs_Y,
double buff_Z,
int  rs_Z,
int  cs_Z,
double buff_T,
int  rs_T,
int  cs_T,
double buff_S,
int  rs_S,
int  cs_S 
)
633{
634 double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
635 double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
637
638 double alpha12;
639 double minus_conj_alpha12;
640 double psi11_minus_alpha12;
641 double minus_inv_tau11;
642 double beta;
643 double last_elem;
644 int i;
645
646 // b_alg = FLA_Obj_length( T );
647 int b_alg = m_TS;
648
649 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
650 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &al );
651 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
652 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
653 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
654 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
655 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &d );
656 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &e );
657 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
658 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &g );
659 double* buff_tmp = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
660 double* buff_w = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
661 double* buff_al = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
662 double* buff_ap = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
663 double* buff_u = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
664 double* buff_up = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
665 double* buff_v = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
666 double* buff_d = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
667 double* buff_e = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
668 int inc_tmp = 1;
669 int inc_w = 1;
670 int inc_al = 1;
671 int inc_ap = 1;
672 int inc_u = 1;
673 int inc_up = 1;
674 int inc_v = 1;
675 int inc_d = 1;
676 int inc_e = 1;
677
678 // FLA_Set( FLA_ZERO, Y );
679 // FLA_Set( FLA_ZERO, Z );
680 bl1_dsetm( n_A,
681 b_alg,
682 buff_0,
683 buff_Y, rs_Y, cs_Y );
684 bl1_dsetm( m_A,
685 b_alg,
686 buff_0,
687 buff_Z, rs_Z, cs_Z );
688
689 for ( i = 0; i < b_alg; ++i )
690 {
691 double* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
692 double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
693 double* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
694 double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
695 double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
696 double* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
697 double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
698 double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
699
700 double* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
701 double* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
702 double* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
703
704 double* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
705 double* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
706 double* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
707
708 double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
709 double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
710
711 double* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
712 double* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
713
714 double* tmp21 = buff_tmp + (i+1)*inc_tmp;
715
716 double* w21 = buff_w + (i+1)*inc_w;
717
718 double* a22l = buff_al + (i+1)*inc_al;
719
720 double* a12p = buff_ap + (i+1)*inc_ap;
721
722 double* u21 = buff_u + (i+1)*inc_u;
723
724 double* u21p = buff_up + (i+1)*inc_up;
725
726 double* v21 = buff_v + (i+1)*inc_v;
727
728 double* d0 = buff_d + (0 )*inc_d;
729
730 double* e0 = buff_e + (0 )*inc_e;
731
732 double* a12p_t = a12p + (0 )*inc_ap;
733 double* a12p_b = a12p + (1 )*inc_ap;
734
735 double* v21_t = v21 + (0 )*inc_v;
736 double* v21_b = v21 + (1 )*inc_v;
737
738 double* a01_b = a01 + (0 )*cs_A + (i-1)*rs_A;
739
740 double* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
741 double* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
742
743 double* ABL = a10t;
744 double* ZBL = z10t;
745
746 double* a2 = alpha11;
747
748 int m_ahead = m_A - i - 1;
749 int n_ahead = n_A - i - 1;
750 int m_behind = i;
751 int n_behind = i;
752
753 /*------------------------------------------------------------*/
754
755 if ( m_behind > 0 )
756 {
757 // FLA_Copy( a01_b, last_elem );
758 // FLA_Set( FLA_ONE, a01_b );
759 last_elem = *a01_b;
760 *a01_b = *buff_1;
761 }
762
763 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
764 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a01, FLA_ONE, a2 );
767 m_ahead + 1,
768 n_behind,
769 buff_m1,
770 ABL, rs_A, cs_A,
771 y10t, cs_Y,
772 buff_1,
773 a2, rs_A );
776 m_ahead + 1,
777 n_behind,
778 buff_m1,
779 ZBL, rs_Z, cs_Z,
780 a01, rs_A,
781 buff_1,
782 a2, rs_A );
783
784 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
785 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, A02, z10t, FLA_ONE, a12t );
788 n_ahead,
789 n_behind,
790 buff_m1,
791 Y20, rs_Y, cs_Y,
792 a10t, cs_A,
793 buff_1,
794 a12t, cs_A );
797 m_behind,
798 n_ahead,
799 buff_m1,
800 A02, rs_A, cs_A,
801 z10t, cs_Z,
802 buff_1,
803 a12t, cs_A );
804
805 if ( m_behind > 0 )
806 {
807 // FLA_Copy( last_elem, a01_b );
808 *a01_b = last_elem;
809 }
810
811 // FLA_Househ2_UT( FLA_LEFT,
812 // alpha11,
813 // a21, tau11 );
814 // FLA_Copy( a21, u21p );
816 alpha11,
817 a21, rs_A,
818 tau11 );
820 m_ahead,
821 a21, rs_A,
822 u21p, inc_up );
823
824 if ( n_ahead > 0 )
825 {
826 // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
827 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
829
830 // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
831 // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
833 n_ahead,
834 a12t, cs_A,
835 a12p, inc_ap );
837 n_ahead,
839 a12t, cs_A,
840 a12p, inc_ap );
841
842 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21p, FLA_ZERO, d0 );
843 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, u21p, FLA_ZERO, e0 );
846 m_ahead,
847 n_behind,
848 buff_1,
849 A20, rs_A, cs_A,
850 u21p, inc_up,
851 buff_0,
852 d0, inc_d );
855 m_ahead,
856 n_behind,
857 buff_1,
858 Z20, rs_Z, cs_Z,
859 u21p, inc_up,
860 buff_0,
861 e0, inc_e );
862
863 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
864 // FLA_Axpy( FLA_ONE, d0, t01 );
866 n_behind,
867 a10t, cs_A,
868 t01, rs_T );
870 n_behind,
871 buff_1,
872 d0, inc_d,
873 t01, rs_T );
874
875 // FLA_Set( FLA_ZERO, y21 );
876 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
877 // FLA_Gemv( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, e0, FLA_ONE, y21 );
879 buff_0,
880 y21, rs_Y );
883 n_ahead,
884 n_behind,
885 buff_m1,
886 Y20, rs_Y, cs_Y,
887 d0, inc_d,
888 buff_1,
889 y21, rs_Y );
892 m_behind,
893 n_ahead,
894 buff_m1,
895 A02, rs_A, cs_A,
896 e0, inc_e,
897 buff_1,
898 y21, rs_Y );
899
900 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, u21p, FLA_ONE, y21 );
901 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
902 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
904 n_ahead,
905 tau11,
906 buff_1,
907 A22, rs_A, cs_A,
908 u21p, inc_up,
909 a12p, inc_ap,
910 y21, rs_Y,
911 w21, inc_w );
912
913 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, Y20, a12p, FLA_ZERO, f0 );
914 // FLA_Gemvc( FLA_CONJ_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A02, a12p, FLA_ZERO, g0 );
915 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, w21 );
916 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, g0, FLA_ONE, w21 );
917 // FLA_Copy( A22_l, a22l );
918 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, A20, Y20_t, FLA_ONE, a22l );
919 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, Z20, A02_l, FLA_ONE, a22l );
920 // FLA_Copy( g0, s01 );
922 n_behind,
923 m_behind,
924 n_ahead,
925 buff_m1,
926 A20, rs_A, cs_A,
927 Y20, rs_Y, cs_Y,
928 Z20, rs_Z, cs_Z,
929 A02, rs_A, cs_A,
930 A22, rs_A, cs_A,
931 tmp21, inc_tmp,
932 s01, rs_S,
933 a12p, inc_ap,
934 w21, inc_w,
935 a22l, inc_al );
936
937 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
939 n_ahead,
940 buff_1,
941 a12t, cs_A,
942 y21, rs_Y );
943
944 // FLA_Househ2s_UT( FLA_RIGHT,
945 // a12p_t,
946 // a12p_b,
947 // alpha12, psi11_minus_alpha12, sigma11 );
949 a12p_t,
950 a12p_b, inc_ap,
951 &alpha12,
953 sigma11 );
954
955 // FLA_Copy( a12p, v21 );
956 // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
957 // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
958 // FLA_Conjugate( v21_b );
960 n_ahead,
961 a12p, inc_ap,
962 v21, inc_v );
965 n_ahead,
967 v21, inc_v );
968 bl1_dconjv( n_ahead - 1,
969 v21_b, inc_v );
970
971 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
972 // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
975
976 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_conj_alpha12, A02_l, s01 );
977 // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, s01 );
979 n_behind,
981 A02, rs_A,
982 s01, rs_S );
984 n_behind,
986 s01, rs_S );
987
988 // FLA_Copy( alpha12, a12t_l );
989 // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
990 *a12t_l = alpha12;
992 n_ahead - 1,
993 v21_b, inc_v,
994 a12t_r, cs_A );
995 }
996
997 // FLA_Copy( u21p, u21 );
999 m_ahead,
1000 u21p, inc_up,
1001 u21, inc_u );
1002
1003 if ( n_ahead > 0 )
1004 {
1005 // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
1006 // FLA_Scal( FLA_MINUS_ONE, beta );
1007 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
1009 n_ahead,
1010 y21, rs_Y,
1011 v21, inc_v,
1012 &beta );
1014
1015 // FLA_Copy( w21, z21 );
1016 // FLA_Axpy( minus_conj_alpha12, a22l, z21 );
1017 // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
1018 // FLA_Axpy( beta, u21, z21 );
1020 m_ahead,
1021 w21, inc_w,
1022 z21, rs_Z );
1024 m_ahead,
1026 a22l, inc_al,
1027 z21, rs_Z );
1029 m_ahead,
1031 z21, rs_Z );
1033 m_ahead,
1034 &beta,
1035 u21, inc_u,
1036 z21, rs_Z );
1037
1038 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
1039 // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
1041 n_ahead,
1042 tau11,
1043 y21, rs_Y );
1045 m_ahead,
1046 sigma11,
1047 z21, rs_Z );
1048 }
1049 else // if ( n_ahead == 0 )
1050 {
1051 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
1052 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
1054 n_behind,
1055 a10t, cs_A,
1056 t01, rs_T );
1059 m_ahead,
1060 n_behind,
1061 buff_1,
1062 A20, rs_A, cs_A,
1063 u21, inc_u,
1064 buff_1,
1065 t01, rs_T );
1066 }
1067
1068 /*------------------------------------------------------------*/
1069
1070 }
1071
1072 // FLA_Obj_free( &w );
1073 // FLA_Obj_free( &al );
1074 // FLA_Obj_free( &ap );
1075 // FLA_Obj_free( &u );
1076 // FLA_Obj_free( &up );
1077 // FLA_Obj_free( &v );
1078 // FLA_Obj_free( &d );
1079 // FLA_Obj_free( &e );
1080 FLA_free( buff_tmp );
1081 FLA_free( buff_w );
1082 FLA_free( buff_al );
1083 FLA_free( buff_ap );
1084 FLA_free( buff_u );
1085 FLA_free( buff_up );
1086 FLA_free( buff_v );
1087 FLA_free( buff_d );
1088 FLA_free( buff_e );
1089
1090 return FLA_SUCCESS;
1091}
FLA_Error FLA_Fused_UYx_ZVx_opd_var1(int m_U, int n_U, int m_V, int n_V, double *buff_delta, double *buff_U, int rs_U, int cs_U, double *buff_Y, int rs_Y, int cs_Y, double *buff_Z, int rs_Z, int cs_Z, double *buff_V, int rs_V, int cs_V, double *buff_A, int rs_A, int cs_A, double *buff_temp, int inc_temp, double *buff_t, int inc_t, double *buff_a, int inc_a, double *buff_w, int inc_w, double *buff_al, int inc_al)
Definition FLA_Fused_UYx_ZVx_opt_var1.c:331
void bl1_dsetv(int m, double *sigma, double *x, int incx)
Definition bl1_setv.c:39
void bl1_dsetm(int m, int n, double *sigma, double *a, int a_rs, int a_cs)
Definition bl1_setm.c:45

References bl1_daxpyv(), bl1_dconjv(), bl1_dcopyv(), bl1_ddot(), bl1_dgemv(), bl1_dinvscalv(), bl1_dsetm(), bl1_dsetv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_free(), FLA_Fused_Ahx_Axpy_Ax_opd_var1(), FLA_Fused_UYx_ZVx_opd_var1(), FLA_Househ2_UT_l_opd(), FLA_Househ2s_UT_r_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_ofu_var4().

◆ FLA_Bidiag_UT_u_step_ofs_var2()

FLA_Error FLA_Bidiag_UT_u_step_ofs_var2 ( int  m_A,
int  n_A,
int  m_TS,
float buff_A,
int  rs_A,
int  cs_A,
float buff_T,
int  rs_T,
int  cs_T,
float buff_S,
int  rs_S,
int  cs_S 
)
120{
121 float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
122 float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
124
125 float beta;
126 int i;
127
128 // b_alg = FLA_Obj_length( T );
129 int b_alg = m_TS;
130
131 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
132 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
133 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
134 float* buff_v = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
135 float* buff_y = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
136 float* buff_z = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
137 int inc_v = 1;
138 int inc_y = 1;
139 int inc_z = 1;
140
141 for ( i = 0; i < b_alg; ++i )
142 {
143 float* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
144 float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
145 float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
146 float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
147 float* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
148 float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
149 float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
150
151 float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
152 float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
153
154 float* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
155 float* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
156
157 float* v21 = buff_v + (i+1)*inc_v;
158
159 float* y21 = buff_y + (i+1)*inc_y;
160
161 float* z21 = buff_z + (i+1)*inc_z;
162
163 float* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
164 float* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
165
166 float* v21_t = v21 + (0 )*inc_v;
167 float* v21_b = v21 + (1 )*inc_v;
168
169 int m_ahead = m_A - i - 1;
170 int n_ahead = n_A - i - 1;
171 int m_behind = i;
172 int n_behind = i;
173
174 /*------------------------------------------------------------*/
175
176 // FLA_Househ2_UT( FLA_LEFT,
177 // alpha11,
178 // a21, tau11 );
180 alpha11,
181 a21, rs_A,
182 tau11 );
183
184 if ( n_ahead > 0 )
185 {
186 // FLA_Copyt( FLA_TRANSPOSE, a12t, y21 );
187 // FLA_Gemvc( FLA_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a21, FLA_ONE, y21 );
189 n_ahead,
190 a12t, cs_A,
191 y21, inc_y );
194 m_ahead,
195 n_ahead,
196 buff_1,
197 A22, rs_A, cs_A,
198 a21, rs_A,
199 buff_1,
200 y21, inc_y );
201
202 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
204 n_ahead,
205 tau11,
206 y21, inc_y );
207
208 // FLA_Axpyt( FLA_TRANSPOSE, FLA_MINUS_ONE, y21, a12t );
210 n_ahead,
211 buff_m1,
212 y21, inc_y,
213 a12t, cs_A );
214
215 // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
217 a12t_l,
218 a12t_r, cs_A,
219 sigma11 );
220
221 // FLA_Set( FLA_ONE, v21_t );
222 // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
223 *v21_t = *buff_1;
225 n_ahead - 1,
226 a12t_r, cs_A,
227 v21_b, inc_y );
228
229 // FLA_Dotc( FLA_CONJUGATE, v21, y21, beta );
230 // FLA_Scal( FLA_MINUS_ONE, beta );
232 n_ahead,
233 v21, inc_v,
234 y21, inc_y,
235 &beta );
236 bl1_sneg1( &beta );
237
238 // FLA_Copy( a21, z21 );
239 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, v21, beta, z21 );
240 // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
242 m_ahead,
243 a21, rs_A,
244 z21, inc_z );
247 m_ahead,
248 n_ahead,
249 buff_1,
250 A22, rs_A, cs_A,
251 v21, inc_v,
252 &beta,
253 z21, inc_z );
255 m_ahead,
256 sigma11,
257 z21, inc_z );
258
259 // FLA_Ger( FLA_MINUS_ONE, a21, y21, A22 );
260 // FLA_Ger( FLA_MINUS_ONE, z21, v21, A22 );
262 n_ahead,
263 buff_m1,
264 a21, rs_A,
265 y21, inc_y,
266 z21, inc_z,
267 v21, inc_v,
268 A22, rs_A, cs_A );
269
270 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
273 m_behind,
274 n_ahead,
275 buff_1,
276 A02, rs_A, cs_A,
277 v21, inc_v,
278 buff_0,
279 s01, rs_S );
280 }
281
282 // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
283 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
285 n_behind,
286 a10t, cs_A,
287 t01, rs_T );
290 m_ahead,
291 n_behind,
292 buff_1,
293 A20, rs_A, cs_A,
294 a21, rs_A,
295 buff_1,
296 t01, rs_T );
297
298 /*------------------------------------------------------------*/
299
300 }
301
302 // FLA_Obj_free( &v );
303 // FLA_Obj_free( &y );
304 // FLA_Obj_free( &z );
305 FLA_free( buff_v );
306 FLA_free( buff_y );
307 FLA_free( buff_z );
308
309 return FLA_SUCCESS;
310}
FLA_Error FLA_Fused_Gerc2_ops_var1(int m_A, int n_A, float *buff_alpha, float *buff_u, int inc_u, float *buff_y, int inc_y, float *buff_z, int inc_z, float *buff_v, int inc_v, float *buff_A, int rs_A, int cs_A)
Definition FLA_Fused_Gerc2_opt_var1.c:130
FLA_Error FLA_Househ2_UT_l_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *tau)
Definition FLA_Househ2_UT.c:160
FLA_Error FLA_Househ2_UT_r_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *tau)
Definition FLA_Househ2_UT.c:651
void bl1_saxpyv(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy)
Definition bl1_axpyv.c:13
void bl1_scopyv(conj1_t conj, int m, float *x, int incx, float *y, int incy)
Definition bl1_copyv.c:35
void bl1_sdot(conj1_t conj, int n, float *x, int incx, float *y, int incy, float *rho)
Definition bl1_dot.c:13
void bl1_sgemv(trans1_t transa, conj1_t conjx, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
Definition bl1_gemv.c:13
void bl1_sinvscalv(conj1_t conj, int n, float *alpha, float *x, int incx)
Definition bl1_invscalv.c:13

References bl1_saxpyv(), bl1_scopyv(), bl1_sdot(), bl1_sgemv(), bl1_sinvscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_free(), FLA_Fused_Gerc2_ops_var1(), FLA_Househ2_UT_l_ops(), FLA_Househ2_UT_r_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_ofu_var2().

◆ FLA_Bidiag_UT_u_step_ofs_var3()

FLA_Error FLA_Bidiag_UT_u_step_ofs_var3 ( int  m_A,
int  n_A,
int  m_TS,
float buff_A,
int  rs_A,
int  cs_A,
float buff_T,
int  rs_T,
int  cs_T,
float buff_S,
int  rs_S,
int  cs_S 
)
120{
121 float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
122 float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
124
125 float alpha12;
126 float minus_conj_alpha12;
128 float minus_inv_tau11;
129 float minus_upsilon11;
130 float minus_conj_nu11;
131 float minus_conj_psi11;
132 float minus_zeta11;
133 float beta;
134 int i;
135
136 // b_alg = FLA_Obj_length( T );
137 int b_alg = m_TS;
138
139 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
140 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
141 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
142 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
143 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
144 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
145 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
146 float* buff_w = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
147 float* buff_ap = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
148 float* buff_u = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
149 float* buff_up = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
150 float* buff_v = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
151 float* buff_y = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
152 float* buff_z = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
153 int inc_w = 1;
154 int inc_ap = 1;
155 int inc_u = 1;
156 int inc_up = 1;
157 int inc_v = 1;
158 int inc_y = 1;
159 int inc_z = 1;
160
161 for ( i = 0; i < b_alg; ++i )
162 {
163 float* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
164 float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
165 float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
166 float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
167 float* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
168 float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
169 float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
170
171 float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
172 float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
173
174 float* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
175 float* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
176
177 float* w21 = buff_w + (i+1)*inc_w;
178
179 float* a12p = buff_ap + (i+1)*inc_ap;
180
181 float* upsilon11 = buff_u + (i )*inc_u;
182 float* u21 = buff_u + (i+1)*inc_u;
183
184 float* u21p = buff_up + (i+1)*inc_up;
185
186 float* nu11 = buff_v + (i )*inc_v;
187 float* v21 = buff_v + (i+1)*inc_v;
188
189 float* psi11 = buff_y + (i )*inc_y;
190 float* y21 = buff_y + (i+1)*inc_y;
191
192 float* zeta11 = buff_z + (i )*inc_z;
193 float* z21 = buff_z + (i+1)*inc_z;
194
195 float* a12p_t = a12p + (0 )*inc_ap;
196 float* a12p_b = a12p + (1 )*inc_ap;
197
198 float* v21_t = v21 + (0 )*inc_v;
199 float* v21_b = v21 + (1 )*inc_v;
200
201 float* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
202 float* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
203
204 float* A22_l = A22 + (0 )*cs_A + (0 )*rs_A;
205
206 int m_ahead = m_A - i - 1;
207 int n_ahead = n_A - i - 1;
208 int m_behind = i;
209 int n_behind = i;
210
211 /*------------------------------------------------------------*/
212
213 if ( m_behind > 0 )
214 {
215 // FLA_Copy( upsilon11, minus_upsilon11 );
216 // FLA_Scal( FLA_MINUS_ONE, minus_upsilon11 );
218
219 // FLA_Copy( zeta11, minus_zeta11 );
220 // FLA_Scal( FLA_MINUS_ONE, minus_zeta11 );
222
223 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, psi11, minus_conj_psi11 );
224 // FLA_Scal( FLA_MINUS_ONE, minus_conj_psi11 );
227
228 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, nu11, minus_conj_nu11 );
229 // FLA_Scal( FLA_MINUS_ONE, minus_conj_nu11 );
232
233 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_upsilon11, psi11, alpha11 );
234 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_zeta11, nu11, alpha11 );
236 1,
238 psi11, 1,
239 alpha11, 1 );
241 1,
243 nu11, 1,
244 alpha11, 1 );
245
246 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, u21, a21 );
247 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11, z21, a21 );
249 m_ahead,
251 u21, inc_u,
252 a21, rs_A );
254 m_ahead,
256 z21, inc_z,
257 a21, rs_A );
258
259 // FLA_Axpyt( FLA_TRANSPOSE, minus_upsilon11, y21, a12t );
260 // FLA_Axpyt( FLA_TRANSPOSE, minus_zeta11, v21, a12t );
262 n_ahead,
264 y21, inc_y,
265 a12t, cs_A );
267 n_ahead,
269 v21, inc_v,
270 a12t, cs_A );
271 }
272
273 // FLA_Househ2_UT( FLA_LEFT,
274 // alpha11,
275 // a21, tau11 );
276 // FLA_Copy( a21, u21p );
278 alpha11,
279 a21, rs_A,
280 tau11 );
282 m_ahead,
283 a21, rs_A,
284 u21p, inc_up );
285
286 if ( n_ahead > 0 )
287 {
288 // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
289 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
291
292 // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
293 // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
295 n_ahead,
296 a12t, cs_A,
297 a12p, inc_ap );
299 n_ahead,
301 a12t, cs_A,
302 a12p, inc_ap );
303 }
304
305 if ( m_behind > 0 && n_ahead > 0 )
306 {
307 // FLA_Ger( FLA_MINUS_ONE, u21, y21, A22 );
308 // FLA_Ger( FLA_MINUS_ONE, z21, v21, A22 );
309 // FLA_Gemvc( FLA_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, u21p, FLA_ZERO, y21 );
310 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
311 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
313 n_ahead,
314 tau11,
315 buff_m1,
316 u21, inc_u,
317 y21, inc_y,
318 z21, inc_z,
319 v21, inc_v,
320 A22, rs_A, cs_A,
321 u21p, inc_up,
322 a12p, inc_ap,
323 w21, inc_w );
324
325
326 }
327 else if ( n_ahead > 0 )
328 {
329 // FLA_Gemvc( FLA_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, u21p, FLA_ZERO, y21 );
330 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
331 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
333 n_ahead,
334 tau11,
335 buff_0,
336 A22, rs_A, cs_A,
337 u21p, inc_up,
338 a12p, inc_ap,
339 y21, inc_y,
340 w21, inc_w );
341 }
342
343 if ( n_ahead > 0 )
344 {
345 // FLA_Axpyt( FLA_TRANSPOSE, FLA_ONE, a12t, y21 );
347 n_ahead,
348 buff_1,
349 a12t, cs_A,
350 y21, inc_y );
351
352 // FLA_Househ2s_UT( FLA_RIGHT,
353 // a12p_t,
354 // a12p_b,
355 // alpha12, psi11_minus_alpha12, sigma11 );
357 a12p_t,
358 a12p_b, inc_ap,
359 &alpha12,
361 sigma11 );
362
363 // FLA_Copy( a12p, v21 );
364 // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
365 // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
367 n_ahead,
368 a12p, inc_ap,
369 v21, inc_v );
372 n_ahead,
374 v21, inc_v );
375
376 // FLA_Copy( alpha12, a12t_l );
377 // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
378 *a12t_l = alpha12;
380 n_ahead - 1,
381 v21_b, inc_v,
382 a12t_r, cs_A );
383 }
384
385 // FLA_Copy( u21p, u21 );
387 m_ahead,
388 u21p, inc_up,
389 u21, inc_u );
390
391 if ( n_ahead > 0 )
392 {
393 // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
394 // FLA_Scal( FLA_MINUS_ONE, beta );
395 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
397 n_ahead,
398 y21, inc_y,
399 v21, inc_v,
400 &beta );
402
403 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
404 // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
407
408 // FLA_Copy( w21, z21 );
409 // FLA_Axpy( minus_conj_alpha12, A22_l, z21 );
410 // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
411 // FLA_Axpy( beta, u21, z21 );
413 m_ahead,
414 w21, inc_w,
415 z21, inc_z );
417 m_ahead,
419 A22_l, rs_A,
420 z21, inc_z );
422 m_ahead,
424 z21, inc_z );
426 m_ahead,
427 &beta,
428 u21, inc_u,
429 z21, inc_z );
430
431 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
432 // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
434 n_ahead,
435 tau11,
436 y21, inc_y );
438 m_ahead,
439 sigma11,
440 z21, inc_z );
441
442 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
445 m_behind,
446 n_ahead,
447 buff_1,
448 A02, rs_A, cs_A,
449 v21, inc_v,
450 buff_0,
451 s01, rs_S );
452 }
453
454 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
455 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
457 n_behind,
458 a10t, cs_A,
459 t01, rs_T );
462 m_ahead,
463 n_behind,
464 buff_1,
465 A20, rs_A, cs_A,
466 u21, inc_u,
467 buff_1,
468 t01, rs_T );
469
470 if ( m_behind + 1 == b_alg && n_ahead > 0 )
471 {
472 // FLA_Ger( FLA_MINUS_ONE, u21, y21, A22 );
473 // FLA_Ger( FLA_MINUS_ONE, z21, v21, A22 );
476 m_ahead,
477 n_ahead,
478 buff_m1,
479 u21, inc_u,
480 y21, inc_y,
481 A22, rs_A, cs_A );
484 m_ahead,
485 n_ahead,
486 buff_m1,
487 z21, inc_z,
488 v21, inc_v,
489 A22, rs_A, cs_A );
490 }
491
492 /*------------------------------------------------------------*/
493
494 }
495
496 // FLA_Obj_free( &w );
497 // FLA_Obj_free( &ap );
498 // FLA_Obj_free( &u );
499 // FLA_Obj_free( &up );
500 // FLA_Obj_free( &v );
501 // FLA_Obj_free( &y );
502 // FLA_Obj_free( &z );
503 FLA_free( buff_w );
504 FLA_free( buff_ap );
505 FLA_free( buff_u );
506 FLA_free( buff_up );
507 FLA_free( buff_v );
508 FLA_free( buff_y );
509 FLA_free( buff_z );
510
511 return FLA_SUCCESS;
512}
FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_ops_var1(int m_A, int n_A, float *buff_tau, float *buff_alpha, float *buff_u, int inc_u, float *buff_y, int inc_y, float *buff_z, int inc_z, float *buff_v, int inc_v, float *buff_A, int rs_A, int cs_A, float *buff_up, int inc_up, float *buff_a, int inc_a, float *buff_w, int inc_w)
Definition FLA_Fused_Gerc2_Ahx_Axpy_Ax_opt_var1.c:170
FLA_Error FLA_Fused_Ahx_Axpy_Ax_ops_var1(int m_A, int n_A, float *buff_tau, float *buff_beta, float *buff_A, int rs_A, int cs_A, float *buff_u, int inc_u, float *buff_a, int inc_a, float *buff_y, int inc_y, float *buff_w, int inc_w)
Definition FLA_Fused_Ahx_Axpy_Ax_opt_var1.c:143
FLA_Error FLA_Househ2s_UT_r_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *alpha, float *chi_1_minus_alpha, float *tau)
Definition FLA_Househ2s_UT.c:555
void bl1_sger(conj1_t conjx, conj1_t conjy, int m, int n, float *alpha, float *x, int incx, float *y, int incy, float *a, int a_rs, int a_cs)
Definition bl1_ger.c:13

References bl1_saxpyv(), bl1_scopyv(), bl1_sdot(), bl1_sgemv(), bl1_sger(), bl1_sinvscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Fused_Ahx_Axpy_Ax_ops_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_ops_var1(), FLA_Househ2_UT_l_ops(), FLA_Househ2s_UT_r_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_ofu_var3().

◆ FLA_Bidiag_UT_u_step_ofs_var4()

FLA_Error FLA_Bidiag_UT_u_step_ofs_var4 ( int  m_A,
int  n_A,
int  m_TS,
float buff_A,
int  rs_A,
int  cs_A,
float buff_Y,
int  rs_Y,
int  cs_Y,
float buff_Z,
int  rs_Z,
int  cs_Z,
float buff_T,
int  rs_T,
int  cs_T,
float buff_S,
int  rs_S,
int  cs_S 
)
163{
164 float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
165 float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
167
168 float alpha12;
169 float minus_conj_alpha12;
171 float minus_inv_tau11;
172 float beta;
173 float last_elem;
174 int i;
175
176 // b_alg = FLA_Obj_length( T );
177 int b_alg = m_TS;
178
179 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
180 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &al );
181 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
182 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
183 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
184 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
185 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &d );
186 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &e );
187 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
188 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &g );
189 float* buff_tmp = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
190 float* buff_w = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
191 float* buff_al = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
192 float* buff_ap = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
193 float* buff_u = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
194 float* buff_up = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
195 float* buff_v = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
196 float* buff_d = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
197 float* buff_e = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
198 int inc_tmp = 1;
199 int inc_w = 1;
200 int inc_al = 1;
201 int inc_ap = 1;
202 int inc_u = 1;
203 int inc_up = 1;
204 int inc_v = 1;
205 int inc_d = 1;
206 int inc_e = 1;
207
208 // FLA_Set( FLA_ZERO, Y );
209 // FLA_Set( FLA_ZERO, Z );
210 bl1_ssetm( n_A,
211 b_alg,
212 buff_0,
213 buff_Y, rs_Y, cs_Y );
214 bl1_ssetm( m_A,
215 b_alg,
216 buff_0,
217 buff_Z, rs_Z, cs_Z );
218
219 for ( i = 0; i < b_alg; ++i )
220 {
221 float* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
222 float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
223 float* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
224 float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
225 float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
226 float* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
227 float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
228 float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
229
230 float* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
231 float* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
232 float* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
233
234 float* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
235 float* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
236 float* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
237
238 float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
239 float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
240
241 float* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
242 float* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
243
244 float* tmp21 = buff_tmp + (i+1)*inc_tmp;
245
246 float* w21 = buff_w + (i+1)*inc_w;
247
248 float* a22l = buff_al + (i+1)*inc_al;
249
250 float* a12p = buff_ap + (i+1)*inc_ap;
251
252 float* u21 = buff_u + (i+1)*inc_u;
253
254 float* u21p = buff_up + (i+1)*inc_up;
255
256 float* v21 = buff_v + (i+1)*inc_v;
257
258 float* d0 = buff_d + (0 )*inc_d;
259
260 float* e0 = buff_e + (0 )*inc_e;
261
262 float* a12p_t = a12p + (0 )*inc_ap;
263 float* a12p_b = a12p + (1 )*inc_ap;
264
265 float* v21_t = v21 + (0 )*inc_v;
266 float* v21_b = v21 + (1 )*inc_v;
267
268 float* a01_b = a01 + (0 )*cs_A + (i-1)*rs_A;
269
270 float* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
271 float* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
272
273 float* ABL = a10t;
274 float* ZBL = z10t;
275
276 float* a2 = alpha11;
277
278 int m_ahead = m_A - i - 1;
279 int n_ahead = n_A - i - 1;
280 int m_behind = i;
281 int n_behind = i;
282
283 /*------------------------------------------------------------*/
284
285 if ( m_behind > 0 )
286 {
287 // FLA_Copy( a01_b, last_elem );
288 // FLA_Set( FLA_ONE, a01_b );
289 last_elem = *a01_b;
290 *a01_b = *buff_1;
291 }
292
293 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
294 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a01, FLA_ONE, a2 );
297 m_ahead + 1,
298 n_behind,
299 buff_m1,
300 ABL, rs_A, cs_A,
301 y10t, cs_Y,
302 buff_1,
303 a2, rs_A );
306 m_ahead + 1,
307 n_behind,
308 buff_m1,
309 ZBL, rs_Z, cs_Z,
310 a01, rs_A,
311 buff_1,
312 a2, rs_A );
313
314 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
315 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, A02, z10t, FLA_ONE, a12t );
318 n_ahead,
319 n_behind,
320 buff_m1,
321 Y20, rs_Y, cs_Y,
322 a10t, cs_A,
323 buff_1,
324 a12t, cs_A );
327 m_behind,
328 n_ahead,
329 buff_m1,
330 A02, rs_A, cs_A,
331 z10t, cs_Z,
332 buff_1,
333 a12t, cs_A );
334
335 if ( m_behind > 0 )
336 {
337 // FLA_Copy( last_elem, a01_b );
338 *a01_b = last_elem;
339 }
340
341 // FLA_Househ2_UT( FLA_LEFT,
342 // alpha11,
343 // a21, tau11 );
344 // FLA_Copy( a21, u21p );
346 alpha11,
347 a21, rs_A,
348 tau11 );
350 m_ahead,
351 a21, rs_A,
352 u21p, inc_up );
353
354 if ( n_ahead > 0 )
355 {
356 // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
357 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
359
360 // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
361 // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
363 n_ahead,
364 a12t, cs_A,
365 a12p, inc_ap );
367 n_ahead,
369 a12t, cs_A,
370 a12p, inc_ap );
371
372 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21p, FLA_ZERO, d0 );
373 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, u21p, FLA_ZERO, e0 );
376 m_ahead,
377 n_behind,
378 buff_1,
379 A20, rs_A, cs_A,
380 u21p, inc_up,
381 buff_0,
382 d0, inc_d );
385 m_ahead,
386 n_behind,
387 buff_1,
388 Z20, rs_Z, cs_Z,
389 u21p, inc_up,
390 buff_0,
391 e0, inc_e );
392
393 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
394 // FLA_Axpy( FLA_ONE, d0, t01 );
396 n_behind,
397 a10t, cs_A,
398 t01, rs_T );
400 n_behind,
401 buff_1,
402 d0, inc_d,
403 t01, rs_T );
404
405 // FLA_Set( FLA_ZERO, y21 );
406 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
407 // FLA_Gemv( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, e0, FLA_ONE, y21 );
409 buff_0,
410 y21, rs_Y );
413 n_ahead,
414 n_behind,
415 buff_m1,
416 Y20, rs_Y, cs_Y,
417 d0, inc_d,
418 buff_1,
419 y21, rs_Y );
422 m_behind,
423 n_ahead,
424 buff_m1,
425 A02, rs_A, cs_A,
426 e0, inc_e,
427 buff_1,
428 y21, rs_Y );
429
430 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, u21p, FLA_ONE, y21 );
431 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
432 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
434 n_ahead,
435 tau11,
436 buff_1,
437 A22, rs_A, cs_A,
438 u21p, inc_up,
439 a12p, inc_ap,
440 y21, rs_Y,
441 w21, inc_w );
442
443 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, Y20, a12p, FLA_ZERO, f0 );
444 // FLA_Gemvc( FLA_CONJ_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A02, a12p, FLA_ZERO, g0 );
445 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, w21 );
446 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, g0, FLA_ONE, w21 );
447 // FLA_Copy( A22_l, a22l );
448 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, A20, Y20_t, FLA_ONE, a22l );
449 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, Z20, A02_l, FLA_ONE, a22l );
450 // FLA_Copy( g0, s01 );
452 n_behind,
453 m_behind,
454 n_ahead,
455 buff_m1,
456 A20, rs_A, cs_A,
457 Y20, rs_Y, cs_Y,
458 Z20, rs_Z, cs_Z,
459 A02, rs_A, cs_A,
460 A22, rs_A, cs_A,
461 tmp21, inc_tmp,
462 s01, rs_S,
463 a12p, inc_ap,
464 w21, inc_w,
465 a22l, inc_al );
466
467 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
469 n_ahead,
470 buff_1,
471 a12t, cs_A,
472 y21, rs_Y );
473
474 // FLA_Househ2s_UT( FLA_RIGHT,
475 // a12p_t,
476 // a12p_b,
477 // alpha12, psi11_minus_alpha12, sigma11 );
479 a12p_t,
480 a12p_b, inc_ap,
481 &alpha12,
483 sigma11 );
484
485 // FLA_Copy( a12p, v21 );
486 // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
487 // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
488 // FLA_Conjugate( v21_b );
490 n_ahead,
491 a12p, inc_ap,
492 v21, inc_v );
495 n_ahead,
497 v21, inc_v );
498 bl1_sconjv( n_ahead - 1,
499 v21_b, inc_v );
500
501 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
502 // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
505
506 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_conj_alpha12, A02_l, s01 );
507 // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, s01 );
509 n_behind,
511 A02, rs_A,
512 s01, rs_S );
514 n_behind,
516 s01, rs_S );
517
518 // FLA_Copy( alpha12, a12t_l );
519 // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
520 *a12t_l = alpha12;
522 n_ahead - 1,
523 v21_b, inc_v,
524 a12t_r, cs_A );
525 }
526
527 // FLA_Copy( u21p, u21 );
529 m_ahead,
530 u21p, inc_up,
531 u21, inc_u );
532
533 if ( n_ahead > 0 )
534 {
535 // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
536 // FLA_Scal( FLA_MINUS_ONE, beta );
537 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
539 n_ahead,
540 y21, rs_Y,
541 v21, inc_v,
542 &beta );
544
545 // FLA_Copy( w21, z21 );
546 // FLA_Axpy( minus_conj_alpha12, a22l, z21 );
547 // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
548 // FLA_Axpy( beta, u21, z21 );
550 m_ahead,
551 w21, inc_w,
552 z21, rs_Z );
554 m_ahead,
556 a22l, inc_al,
557 z21, rs_Z );
559 m_ahead,
561 z21, rs_Z );
563 m_ahead,
564 &beta,
565 u21, inc_u,
566 z21, rs_Z );
567
568 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
569 // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
571 n_ahead,
572 tau11,
573 y21, rs_Y );
575 m_ahead,
576 sigma11,
577 z21, rs_Z );
578 }
579 else // if ( n_ahead == 0 )
580 {
581 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
582 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
584 n_behind,
585 a10t, cs_A,
586 t01, rs_T );
589 m_ahead,
590 n_behind,
591 buff_1,
592 A20, rs_A, cs_A,
593 u21, inc_u,
594 buff_1,
595 t01, rs_T );
596 }
597
598 /*------------------------------------------------------------*/
599
600 }
601
602 // FLA_Obj_free( &w );
603 // FLA_Obj_free( &al );
604 // FLA_Obj_free( &ap );
605 // FLA_Obj_free( &u );
606 // FLA_Obj_free( &up );
607 // FLA_Obj_free( &v );
608 // FLA_Obj_free( &d );
609 // FLA_Obj_free( &e );
611 FLA_free( buff_w );
612 FLA_free( buff_al );
613 FLA_free( buff_ap );
614 FLA_free( buff_u );
615 FLA_free( buff_up );
616 FLA_free( buff_v );
617 FLA_free( buff_d );
618 FLA_free( buff_e );
619
620 return FLA_SUCCESS;
621}
FLA_Error FLA_Fused_UYx_ZVx_ops_var1(int m_U, int n_U, int m_V, int n_V, float *buff_delta, float *buff_U, int rs_U, int cs_U, float *buff_Y, int rs_Y, int cs_Y, float *buff_Z, int rs_Z, int cs_Z, float *buff_V, int rs_V, int cs_V, float *buff_A, int rs_A, int cs_A, float *buff_temp, int inc_temp, float *buff_t, int inc_t, float *buff_a, int inc_a, float *buff_w, int inc_w, float *buff_al, int inc_al)
Definition FLA_Fused_UYx_ZVx_opt_var1.c:201
void bl1_sconjv(int m, float *x, int incx)
Definition bl1_conjv.c:13
void bl1_ssetm(int m, int n, float *sigma, float *a, int a_rs, int a_cs)
Definition bl1_setm.c:29
void bl1_ssetv(int m, float *sigma, float *x, int incx)
Definition bl1_setv.c:26

References bl1_saxpyv(), bl1_sconjv(), bl1_scopyv(), bl1_sdot(), bl1_sgemv(), bl1_sinvscalv(), bl1_ssetm(), bl1_ssetv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_free(), FLA_Fused_Ahx_Axpy_Ax_ops_var1(), FLA_Fused_UYx_ZVx_ops_var1(), FLA_Househ2_UT_l_ops(), FLA_Househ2s_UT_r_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_ofu_var4().

◆ FLA_Bidiag_UT_u_step_ofu_var2()

FLA_Error FLA_Bidiag_UT_u_step_ofu_var2 ( FLA_Obj  A,
FLA_Obj  T,
FLA_Obj  S 
)
19{
20 FLA_Datatype datatype;
21 int m_A, n_A, m_TS;
22 int rs_A, cs_A;
23 int rs_T, cs_T;
24 int rs_S, cs_S;
25
26 datatype = FLA_Obj_datatype( A );
27
28 m_A = FLA_Obj_length( A );
29 n_A = FLA_Obj_width( A );
31
34
37
40
41
42 switch ( datatype )
43 {
44 case FLA_FLOAT:
45 {
46 float* buff_A = FLA_FLOAT_PTR( A );
47 float* buff_T = FLA_FLOAT_PTR( T );
48 float* buff_S = FLA_FLOAT_PTR( S );
49
51 n_A,
52 m_TS,
55 buff_S, rs_S, cs_S );
56
57 break;
58 }
59
60 case FLA_DOUBLE:
61 {
62 double* buff_A = FLA_DOUBLE_PTR( A );
63 double* buff_T = FLA_DOUBLE_PTR( T );
64 double* buff_S = FLA_DOUBLE_PTR( S );
65
67 n_A,
68 m_TS,
71 buff_S, rs_S, cs_S );
72
73 break;
74 }
75
76 case FLA_COMPLEX:
77 {
81
83 n_A,
84 m_TS,
87 buff_S, rs_S, cs_S );
88
89 break;
90 }
91
93 {
97
99 n_A,
100 m_TS,
101 buff_A, rs_A, cs_A,
102 buff_T, rs_T, cs_T,
103 buff_S, rs_S, cs_S );
104
105 break;
106 }
107 }
108
109 return FLA_SUCCESS;
110}
FLA_Error FLA_Bidiag_UT_u_step_ofs_var2(int m_A, int n_A, int m_TS, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T, float *buff_S, int rs_S, int cs_S)
Definition FLA_Bidiag_UT_u_fus_var2.c:114
FLA_Error FLA_Bidiag_UT_u_step_ofc_var2(int m_A, int n_A, int m_TS, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T, scomplex *buff_S, int rs_S, int cs_S)
Definition FLA_Bidiag_UT_u_fus_var2.c:514
FLA_Error FLA_Bidiag_UT_u_step_ofz_var2(int m_A, int n_A, int m_TS, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T, dcomplex *buff_S, int rs_S, int cs_S)
Definition FLA_Bidiag_UT_u_fus_var2.c:714
FLA_Error FLA_Bidiag_UT_u_step_ofd_var2(int m_A, int n_A, int m_TS, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T, double *buff_S, int rs_S, int cs_S)
Definition FLA_Bidiag_UT_u_fus_var2.c:314
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition FLA_Query.c:167
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition FLA_Query.c:174
Definition blis_type_defs.h:138

References FLA_Bidiag_UT_u_step_ofc_var2(), FLA_Bidiag_UT_u_step_ofd_var2(), FLA_Bidiag_UT_u_step_ofs_var2(), FLA_Bidiag_UT_u_step_ofz_var2(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), and i.

Referenced by FLA_Bidiag_UT_u_blf_var2(), and FLA_Bidiag_UT_u_ofu_var2().

◆ FLA_Bidiag_UT_u_step_ofu_var3()

FLA_Error FLA_Bidiag_UT_u_step_ofu_var3 ( FLA_Obj  A,
FLA_Obj  T,
FLA_Obj  S 
)
19{
20 FLA_Datatype datatype;
21 int m_A, n_A, m_TS;
22 int rs_A, cs_A;
23 int rs_T, cs_T;
24 int rs_S, cs_S;
25
26 datatype = FLA_Obj_datatype( A );
27
28 m_A = FLA_Obj_length( A );
29 n_A = FLA_Obj_width( A );
31
34
37
40
41
42 switch ( datatype )
43 {
44 case FLA_FLOAT:
45 {
46 float* buff_A = FLA_FLOAT_PTR( A );
47 float* buff_T = FLA_FLOAT_PTR( T );
48 float* buff_S = FLA_FLOAT_PTR( S );
49
51 n_A,
52 m_TS,
55 buff_S, rs_S, cs_S );
56
57 break;
58 }
59
60 case FLA_DOUBLE:
61 {
62 double* buff_A = FLA_DOUBLE_PTR( A );
63 double* buff_T = FLA_DOUBLE_PTR( T );
64 double* buff_S = FLA_DOUBLE_PTR( S );
65
67 n_A,
68 m_TS,
71 buff_S, rs_S, cs_S );
72
73 break;
74 }
75
76 case FLA_COMPLEX:
77 {
81
83 n_A,
84 m_TS,
87 buff_S, rs_S, cs_S );
88
89 break;
90 }
91
93 {
97
99 n_A,
100 m_TS,
101 buff_A, rs_A, cs_A,
102 buff_T, rs_T, cs_T,
103 buff_S, rs_S, cs_S );
104
105 break;
106 }
107 }
108
109 return FLA_SUCCESS;
110}
FLA_Error FLA_Bidiag_UT_u_step_ofd_var3(int m_A, int n_A, int m_TS, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T, double *buff_S, int rs_S, int cs_S)
Definition FLA_Bidiag_UT_u_fus_var3.c:516
FLA_Error FLA_Bidiag_UT_u_step_ofz_var3(int m_A, int n_A, int m_TS, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T, dcomplex *buff_S, int rs_S, int cs_S)
Definition FLA_Bidiag_UT_u_fus_var3.c:1326
FLA_Error FLA_Bidiag_UT_u_step_ofs_var3(int m_A, int n_A, int m_TS, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T, float *buff_S, int rs_S, int cs_S)
Definition FLA_Bidiag_UT_u_fus_var3.c:114
FLA_Error FLA_Bidiag_UT_u_step_ofc_var3(int m_A, int n_A, int m_TS, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T, scomplex *buff_S, int rs_S, int cs_S)
Definition FLA_Bidiag_UT_u_fus_var3.c:921

References FLA_Bidiag_UT_u_step_ofc_var3(), FLA_Bidiag_UT_u_step_ofd_var3(), FLA_Bidiag_UT_u_step_ofs_var3(), FLA_Bidiag_UT_u_step_ofz_var3(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), and i.

Referenced by FLA_Bidiag_UT_u_blf_var3(), and FLA_Bidiag_UT_u_ofu_var3().

◆ FLA_Bidiag_UT_u_step_ofu_var4()

FLA_Error FLA_Bidiag_UT_u_step_ofu_var4 ( FLA_Obj  A,
FLA_Obj  Y,
FLA_Obj  Z,
FLA_Obj  T,
FLA_Obj  S 
)
36{
37 FLA_Datatype datatype;
38 int m_A, n_A, m_TS;
39 int rs_A, cs_A;
40 int rs_Y, cs_Y;
41 int rs_Z, cs_Z;
42 int rs_T, cs_T;
43 int rs_S, cs_S;
44
45 datatype = FLA_Obj_datatype( A );
46
47 m_A = FLA_Obj_length( A );
48 n_A = FLA_Obj_width( A );
50
53
56
59
62
65
66
67 switch ( datatype )
68 {
69 case FLA_FLOAT:
70 {
71 float* buff_A = FLA_FLOAT_PTR( A );
72 float* buff_Y = FLA_FLOAT_PTR( Y );
73 float* buff_Z = FLA_FLOAT_PTR( Z );
74 float* buff_T = FLA_FLOAT_PTR( T );
75 float* buff_S = FLA_FLOAT_PTR( S );
76
78 n_A,
79 m_TS,
84 buff_S, rs_S, cs_S );
85
86 break;
87 }
88
89 case FLA_DOUBLE:
90 {
91 double* buff_A = FLA_DOUBLE_PTR( A );
92 double* buff_Y = FLA_DOUBLE_PTR( Y );
93 double* buff_Z = FLA_DOUBLE_PTR( Z );
94 double* buff_T = FLA_DOUBLE_PTR( T );
95 double* buff_S = FLA_DOUBLE_PTR( S );
96
98 n_A,
99 m_TS,
100 buff_A, rs_A, cs_A,
101 buff_Y, rs_Y, cs_Y,
102 buff_Z, rs_Z, cs_Z,
103 buff_T, rs_T, cs_T,
104 buff_S, rs_S, cs_S );
105
106 break;
107 }
108
109 case FLA_COMPLEX:
110 {
116
118 n_A,
119 m_TS,
120 buff_A, rs_A, cs_A,
121 buff_Y, rs_Y, cs_Y,
122 buff_Z, rs_Z, cs_Z,
123 buff_T, rs_T, cs_T,
124 buff_S, rs_S, cs_S );
125
126 break;
127 }
128
130 {
136
138 n_A,
139 m_TS,
140 buff_A, rs_A, cs_A,
141 buff_Y, rs_Y, cs_Y,
142 buff_Z, rs_Z, cs_Z,
143 buff_T, rs_T, cs_T,
144 buff_S, rs_S, cs_S );
145
146 break;
147 }
148 }
149
150 return FLA_SUCCESS;
151}
FLA_Error FLA_Bidiag_UT_u_step_ofz_var4(int m_A, int n_A, int m_TS, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_Y, int rs_Y, int cs_Y, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_T, int rs_T, int cs_T, dcomplex *buff_S, int rs_S, int cs_S)
Definition FLA_Bidiag_UT_u_fus_var4.c:1565
FLA_Error FLA_Bidiag_UT_u_step_ofs_var4(int m_A, int n_A, int m_TS, float *buff_A, int rs_A, int cs_A, float *buff_Y, int rs_Y, int cs_Y, float *buff_Z, int rs_Z, int cs_Z, float *buff_T, int rs_T, int cs_T, float *buff_S, int rs_S, int cs_S)
Definition FLA_Bidiag_UT_u_fus_var4.c:155
FLA_Error FLA_Bidiag_UT_u_step_ofc_var4(int m_A, int n_A, int m_TS, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_Y, int rs_Y, int cs_Y, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_T, int rs_T, int cs_T, scomplex *buff_S, int rs_S, int cs_S)
Definition FLA_Bidiag_UT_u_fus_var4.c:1095
FLA_Error FLA_Bidiag_UT_u_step_ofd_var4(int m_A, int n_A, int m_TS, double *buff_A, int rs_A, int cs_A, double *buff_Y, int rs_Y, int cs_Y, double *buff_Z, int rs_Z, int cs_Z, double *buff_T, int rs_T, int cs_T, double *buff_S, int rs_S, int cs_S)
Definition FLA_Bidiag_UT_u_fus_var4.c:625

References FLA_Bidiag_UT_u_step_ofc_var4(), FLA_Bidiag_UT_u_step_ofd_var4(), FLA_Bidiag_UT_u_step_ofs_var4(), FLA_Bidiag_UT_u_step_ofz_var4(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), and i.

Referenced by FLA_Bidiag_UT_u_blf_var4(), and FLA_Bidiag_UT_u_ofu_var4().

◆ FLA_Bidiag_UT_u_step_ofz_var2()

FLA_Error FLA_Bidiag_UT_u_step_ofz_var2 ( int  m_A,
int  n_A,
int  m_TS,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_T,
int  rs_T,
int  cs_T,
dcomplex buff_S,
int  rs_S,
int  cs_S 
)
720{
724
726 int i;
727
728 // b_alg = FLA_Obj_length( T );
729 int b_alg = m_TS;
730
731 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
732 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
733 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
734 dcomplex* buff_v = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
735 dcomplex* buff_y = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
736 dcomplex* buff_z = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
737 int inc_v = 1;
738 int inc_y = 1;
739 int inc_z = 1;
740
741 for ( i = 0; i < b_alg; ++i )
742 {
743 dcomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
744 dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
745 dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
746 dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
747 dcomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
748 dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
749 dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
750
751 dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
752 dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
753
754 dcomplex* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
755 dcomplex* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
756
757 dcomplex* v21 = buff_v + (i+1)*inc_v;
758
759 dcomplex* y21 = buff_y + (i+1)*inc_y;
760
761 dcomplex* z21 = buff_z + (i+1)*inc_z;
762
763 dcomplex* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
764 dcomplex* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
765
766 dcomplex* v21_t = v21 + (0 )*inc_v;
767 dcomplex* v21_b = v21 + (1 )*inc_v;
768
769 int m_ahead = m_A - i - 1;
770 int n_ahead = n_A - i - 1;
771 int m_behind = i;
772 int n_behind = i;
773
774 /*------------------------------------------------------------*/
775
776 // FLA_Househ2_UT( FLA_LEFT,
777 // alpha11,
778 // a21, tau11 );
780 alpha11,
781 a21, rs_A,
782 tau11 );
783
784 if ( n_ahead > 0 )
785 {
786 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a12t, y21 );
787 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, a21, FLA_ONE, y21 );
789 n_ahead,
790 a12t, cs_A,
791 y21, inc_y );
794 m_ahead,
795 n_ahead,
796 buff_1,
797 A22, rs_A, cs_A,
798 a21, rs_A,
799 buff_1,
800 y21, inc_y );
801
802 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
804 n_ahead,
805 tau11,
806 y21, inc_y );
807
808 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, y21, a12t );
810 n_ahead,
811 buff_m1,
812 y21, inc_y,
813 a12t, cs_A );
814
815 // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
817 a12t_l,
818 a12t_r, cs_A,
819 sigma11 );
820
821 // FLA_Set( FLA_ONE, v21_t );
822 // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
823 *v21_t = *buff_1;
825 n_ahead - 1,
826 a12t_r, cs_A,
827 v21_b, inc_y );
828
829 // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
830 // FLA_Scal( FLA_MINUS_ONE, beta );
832 n_ahead,
833 y21, inc_y,
834 v21, inc_v,
835 &beta );
836 bl1_zneg1( &beta );
837
838 // FLA_Copy( a21, z21 );
839 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, v21, beta, z21 );
840 // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
842 m_ahead,
843 a21, rs_A,
844 z21, inc_z );
847 m_ahead,
848 n_ahead,
849 buff_1,
850 A22, rs_A, cs_A,
851 v21, inc_v,
852 &beta,
853 z21, inc_z );
855 m_ahead,
856 sigma11,
857 z21, inc_z );
858
859 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, a21, y21, A22 );
860 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
862 n_ahead,
863 buff_m1,
864 a21, rs_A,
865 y21, inc_y,
866 z21, inc_z,
867 v21, inc_v,
868 A22, rs_A, cs_A );
869
870 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
873 m_behind,
874 n_ahead,
875 buff_1,
876 A02, rs_A, cs_A,
877 v21, inc_v,
878 buff_0,
879 s01, rs_S );
880 }
881
882 // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
883 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
885 n_behind,
886 a10t, cs_A,
887 t01, rs_T );
890 m_ahead,
891 n_behind,
892 buff_1,
893 A20, rs_A, cs_A,
894 a21, rs_A,
895 buff_1,
896 t01, rs_T );
897
898 /*------------------------------------------------------------*/
899
900 }
901
902 // FLA_Obj_free( &v );
903 // FLA_Obj_free( &y );
904 // FLA_Obj_free( &z );
905 FLA_free( buff_v );
906 FLA_free( buff_y );
907 FLA_free( buff_z );
908
909 return FLA_SUCCESS;
910}
FLA_Error FLA_Fused_Gerc2_opz_var1(int m_A, int n_A, dcomplex *buff_alpha, dcomplex *buff_u, int inc_u, dcomplex *buff_y, int inc_y, dcomplex *buff_z, int inc_z, dcomplex *buff_v, int inc_v, dcomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Fused_Gerc2_opt_var1.c:306
FLA_Error FLA_Househ2_UT_l_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *tau)
Definition FLA_Househ2_UT.c:521
FLA_Error FLA_Househ2_UT_r_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *tau)
Definition FLA_Househ2_UT.c:693
void bl1_zaxpyv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy)
Definition bl1_axpyv.c:60
void bl1_zcopyv(conj1_t conj, int m, dcomplex *x, int incx, dcomplex *y, int incy)
Definition bl1_copyv.c:63
void bl1_zdot(conj1_t conj, int n, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *rho)
Definition bl1_dot.c:65
void bl1_zgemv(trans1_t transa, conj1_t conjx, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition bl1_gemv.c:255
void bl1_zinvscalv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx)
Definition bl1_invscalv.c:78

References bl1_zaxpyv(), bl1_zcopyv(), bl1_zdot(), bl1_zgemv(), bl1_zinvscalv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Fused_Gerc2_opz_var1(), FLA_Househ2_UT_l_opz(), FLA_Househ2_UT_r_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_ofu_var2().

◆ FLA_Bidiag_UT_u_step_ofz_var3()

FLA_Error FLA_Bidiag_UT_u_step_ofz_var3 ( int  m_A,
int  n_A,
int  m_TS,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_T,
int  rs_T,
int  cs_T,
dcomplex buff_S,
int  rs_S,
int  cs_S 
)
1332{
1336
1345 dcomplex beta;
1346 int i;
1347
1348 // b_alg = FLA_Obj_length( T );
1349 int b_alg = m_TS;
1350
1351 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
1352 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
1353 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
1354 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
1355 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
1356 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
1357 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
1358 dcomplex* buff_w = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1359 dcomplex* buff_ap = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1360 dcomplex* buff_u = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1361 dcomplex* buff_up = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1362 dcomplex* buff_v = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1363 dcomplex* buff_y = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1364 dcomplex* buff_z = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1365 int inc_w = 1;
1366 int inc_ap = 1;
1367 int inc_u = 1;
1368 int inc_up = 1;
1369 int inc_v = 1;
1370 int inc_y = 1;
1371 int inc_z = 1;
1372
1373 for ( i = 0; i < b_alg; ++i )
1374 {
1375 dcomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
1376 dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
1377 dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
1378 dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
1379 dcomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
1380 dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
1381 dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
1382
1383 dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
1384 dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
1385
1386 dcomplex* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
1387 dcomplex* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
1388
1389 dcomplex* w21 = buff_w + (i+1)*inc_w;
1390
1391 dcomplex* a12p = buff_ap + (i+1)*inc_ap;
1392
1394 dcomplex* u21 = buff_u + (i+1)*inc_u;
1395
1396 dcomplex* u21p = buff_up + (i+1)*inc_up;
1397
1398 dcomplex* nu11 = buff_v + (i )*inc_v;
1399 dcomplex* v21 = buff_v + (i+1)*inc_v;
1400
1401 dcomplex* psi11 = buff_y + (i )*inc_y;
1402 dcomplex* y21 = buff_y + (i+1)*inc_y;
1403
1404 dcomplex* zeta11 = buff_z + (i )*inc_z;
1405 dcomplex* z21 = buff_z + (i+1)*inc_z;
1406
1407 dcomplex* a12p_t = a12p + (0 )*inc_ap;
1408 dcomplex* a12p_b = a12p + (1 )*inc_ap;
1409
1410 dcomplex* v21_t = v21 + (0 )*inc_v;
1411 dcomplex* v21_b = v21 + (1 )*inc_v;
1412
1413 dcomplex* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
1414 dcomplex* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
1415
1416 dcomplex* A22_l = A22 + (0 )*cs_A + (0 )*rs_A;
1417
1418 int m_ahead = m_A - i - 1;
1419 int n_ahead = n_A - i - 1;
1420 int m_behind = i;
1421 int n_behind = i;
1422
1423 /*------------------------------------------------------------*/
1424
1425 if ( m_behind > 0 )
1426 {
1427 // FLA_Copy( upsilon11, minus_upsilon11 );
1428 // FLA_Scal( FLA_MINUS_ONE, minus_upsilon11 );
1430
1431 // FLA_Copy( zeta11, minus_zeta11 );
1432 // FLA_Scal( FLA_MINUS_ONE, minus_zeta11 );
1434
1435 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, psi11, minus_conj_psi11 );
1436 // FLA_Scal( FLA_MINUS_ONE, minus_conj_psi11 );
1439
1440 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, nu11, minus_conj_nu11 );
1441 // FLA_Scal( FLA_MINUS_ONE, minus_conj_nu11 );
1444
1445 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, upsilon11, alpha11 );
1446 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11, zeta11, alpha11 );
1448 1,
1450 upsilon11, 1,
1451 alpha11, 1 );
1453 1,
1455 zeta11, 1,
1456 alpha11, 1 );
1457
1458 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, u21, a21 );
1459 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11, z21, a21 );
1461 m_ahead,
1463 u21, inc_u,
1464 a21, rs_A );
1466 m_ahead,
1468 z21, inc_z,
1469 a21, rs_A );
1470
1471 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_upsilon11, y21, a12t );
1472 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_zeta11, v21, a12t );
1474 n_ahead,
1476 y21, inc_y,
1477 a12t, cs_A );
1479 n_ahead,
1480 &minus_zeta11,
1481 v21, inc_v,
1482 a12t, cs_A );
1483 }
1484
1485 // FLA_Househ2_UT( FLA_LEFT,
1486 // alpha11,
1487 // a21, tau11 );
1488 // FLA_Copy( a21, u21p );
1490 alpha11,
1491 a21, rs_A,
1492 tau11 );
1494 m_ahead,
1495 a21, rs_A,
1496 u21p, inc_up );
1497
1498 if ( n_ahead > 0 )
1499 {
1500 // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
1501 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
1503
1504 // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
1505 // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
1507 n_ahead,
1508 a12t, cs_A,
1509 a12p, inc_ap );
1511 n_ahead,
1513 a12t, cs_A,
1514 a12p, inc_ap );
1515 }
1516
1517 if ( m_behind > 0 && n_ahead > 0 )
1518 {
1519 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u21, y21, A22 );
1520 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
1521 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, u21p, FLA_ZERO, y21 );
1522 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
1523 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
1525 n_ahead,
1526 tau11,
1527 buff_m1,
1528 u21, inc_u,
1529 y21, inc_y,
1530 z21, inc_z,
1531 v21, inc_v,
1532 A22, rs_A, cs_A,
1533 u21p, inc_up,
1534 a12p, inc_ap,
1535 w21, inc_w );
1536
1537
1538 }
1539 else if ( n_ahead > 0 )
1540 {
1541 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, u21p, FLA_ZERO, y21 );
1542 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
1543 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
1545 n_ahead,
1546 tau11,
1547 buff_0,
1548 A22, rs_A, cs_A,
1549 u21p, inc_up,
1550 a12p, inc_ap,
1551 y21, inc_y,
1552 w21, inc_w );
1553 }
1554
1555 if ( n_ahead > 0 )
1556 {
1557 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
1559 n_ahead,
1560 buff_1,
1561 a12t, cs_A,
1562 y21, inc_y );
1563
1564 // FLA_Househ2s_UT( FLA_RIGHT,
1565 // a12p_t,
1566 // a12p_b,
1567 // alpha12, psi11_minus_alpha12, sigma11 );
1569 a12p_t,
1570 a12p_b, inc_ap,
1571 &alpha12,
1573 sigma11 );
1574
1575 // FLA_Copy( a12p, v21 );
1576 // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
1577 // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
1578 // FLA_Conjugate( v21_b );
1580 n_ahead,
1581 a12p, inc_ap,
1582 v21, inc_v );
1585 n_ahead,
1587 v21, inc_v );
1588 bl1_zconjv( n_ahead - 1,
1589 v21_b, inc_v );
1590
1591 // FLA_Copyt( FLA_NO_TRANSPOSE, alpha12, a12t_l );
1592 // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
1593 *a12t_l = alpha12;
1595 n_ahead - 1,
1596 v21_b, inc_v,
1597 a12t_r, cs_A );
1598 }
1599
1600 // FLA_Copy( u21p, u21 );
1602 m_ahead,
1603 u21p, inc_up,
1604 u21, inc_u );
1605
1606 if ( n_ahead > 0 )
1607 {
1608 // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
1609 // FLA_Scal( FLA_MINUS_ONE, beta );
1610 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
1612 n_ahead,
1613 y21, inc_y,
1614 v21, inc_v,
1615 &beta );
1617
1618 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
1619 // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
1622
1623 // FLA_Copy( w21, z21 );
1624 // FLA_Axpy( minus_conj_alpha12, A22_l, z21 );
1625 // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
1626 // FLA_Axpy( beta, u21, z21 );
1628 m_ahead,
1629 w21, inc_w,
1630 z21, inc_z );
1632 m_ahead,
1634 A22_l, rs_A,
1635 z21, inc_z );
1637 m_ahead,
1639 z21, inc_z );
1641 m_ahead,
1642 &beta,
1643 u21, inc_u,
1644 z21, inc_z );
1645
1646 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
1647 // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
1649 n_ahead,
1650 tau11,
1651 y21, inc_y );
1653 m_ahead,
1654 sigma11,
1655 z21, inc_z );
1656
1657 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
1660 m_behind,
1661 n_ahead,
1662 buff_1,
1663 A02, rs_A, cs_A,
1664 v21, inc_v,
1665 buff_0,
1666 s01, rs_S );
1667 }
1668
1669 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
1670 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
1672 n_behind,
1673 a10t, cs_A,
1674 t01, rs_T );
1677 m_ahead,
1678 n_behind,
1679 buff_1,
1680 A20, rs_A, cs_A,
1681 u21, inc_u,
1682 buff_1,
1683 t01, rs_T );
1684
1685 if ( m_behind + 1 == b_alg && n_ahead > 0 )
1686 {
1687 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u21, y21, A22 );
1688 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
1691 m_ahead,
1692 n_ahead,
1693 buff_m1,
1694 u21, inc_u,
1695 y21, inc_y,
1696 A22, rs_A, cs_A );
1699 m_ahead,
1700 n_ahead,
1701 buff_m1,
1702 z21, inc_z,
1703 v21, inc_v,
1704 A22, rs_A, cs_A );
1705 }
1706
1707 /*------------------------------------------------------------*/
1708
1709 }
1710
1711 // FLA_Obj_free( &w );
1712 // FLA_Obj_free( &ap );
1713 // FLA_Obj_free( &u );
1714 // FLA_Obj_free( &up );
1715 // FLA_Obj_free( &v );
1716 // FLA_Obj_free( &y );
1717 // FLA_Obj_free( &z );
1718 FLA_free( buff_w );
1719 FLA_free( buff_ap );
1720 FLA_free( buff_u );
1721 FLA_free( buff_up );
1722 FLA_free( buff_v );
1723 FLA_free( buff_y );
1724 FLA_free( buff_z );
1725
1726 return FLA_SUCCESS;
1727}
FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_opz_var1(int m_A, int n_A, dcomplex *buff_tau, dcomplex *buff_alpha, dcomplex *buff_u, int inc_u, dcomplex *buff_y, int inc_y, dcomplex *buff_z, int inc_z, dcomplex *buff_v, int inc_v, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_up, int inc_up, dcomplex *buff_a, int inc_a, dcomplex *buff_w, int inc_w)
Definition FLA_Fused_Gerc2_Ahx_Axpy_Ax_opt_var1.c:523
FLA_Error FLA_Fused_Ahx_Axpy_Ax_opz_var1(int m_A, int n_A, dcomplex *buff_tau, dcomplex *buff_beta, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_u, int inc_u, dcomplex *buff_a, int inc_a, dcomplex *buff_y, int inc_y, dcomplex *buff_w, int inc_w)
Definition FLA_Fused_Ahx_Axpy_Ax_opt_var1.c:390
FLA_Error FLA_Househ2s_UT_r_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *alpha, dcomplex *chi_1_minus_alpha, dcomplex *tau)
Definition FLA_Househ2s_UT.c:610
void bl1_zconjv(int m, dcomplex *x, int incx)
Definition bl1_conjv.c:34
bl1_zscals(beta, rho_yz)
void bl1_zger(conj1_t conjx, conj1_t conjy, int m, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *a, int a_rs, int a_cs)
Definition bl1_ger.c:194

References bl1_zaxpyv(), bl1_zconjv(), bl1_zcopyv(), bl1_zdot(), bl1_zgemv(), bl1_zger(), bl1_zinvscalv(), bl1_zscals(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, FLA_free(), FLA_Fused_Ahx_Axpy_Ax_opz_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_opz_var1(), FLA_Househ2_UT_l_opz(), FLA_Househ2s_UT_r_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_ofu_var3().

◆ FLA_Bidiag_UT_u_step_ofz_var4()

FLA_Error FLA_Bidiag_UT_u_step_ofz_var4 ( int  m_A,
int  n_A,
int  m_TS,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_Y,
int  rs_Y,
int  cs_Y,
dcomplex buff_Z,
int  rs_Z,
int  cs_Z,
dcomplex buff_T,
int  rs_T,
int  cs_T,
dcomplex buff_S,
int  rs_S,
int  cs_S 
)
1573{
1577
1582 dcomplex beta;
1584 int i;
1585
1586 // b_alg = FLA_Obj_length( T );
1587 int b_alg = m_TS;
1588
1589 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
1590 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &al );
1591 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
1592 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
1593 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
1594 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
1595 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &d );
1596 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &e );
1597 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
1598 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &g );
1599 dcomplex* buff_tmp = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1600 dcomplex* buff_w = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1601 dcomplex* buff_al = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1602 dcomplex* buff_ap = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1603 dcomplex* buff_u = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1604 dcomplex* buff_up = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1605 dcomplex* buff_v = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1606 dcomplex* buff_d = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1607 dcomplex* buff_e = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1608 int inc_tmp = 1;
1609 int inc_w = 1;
1610 int inc_al = 1;
1611 int inc_ap = 1;
1612 int inc_u = 1;
1613 int inc_up = 1;
1614 int inc_v = 1;
1615 int inc_d = 1;
1616 int inc_e = 1;
1617
1618 // FLA_Set( FLA_ZERO, Y );
1619 // FLA_Set( FLA_ZERO, Z );
1620 bl1_zsetm( n_A,
1621 b_alg,
1622 buff_0,
1623 buff_Y, rs_Y, cs_Y );
1624 bl1_zsetm( m_A,
1625 b_alg,
1626 buff_0,
1627 buff_Z, rs_Z, cs_Z );
1628
1629 for ( i = 0; i < b_alg; ++i )
1630 {
1631 dcomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
1632 dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
1633 dcomplex* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
1634 dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
1635 dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
1636 dcomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
1637 dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
1638 dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
1639
1640 dcomplex* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
1641 dcomplex* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
1642 dcomplex* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
1643
1644 dcomplex* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
1645 dcomplex* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
1646 dcomplex* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
1647
1648 dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
1649 dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
1650
1651 dcomplex* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
1652 dcomplex* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
1653
1654 dcomplex* tmp21 = buff_tmp + (i+1)*inc_tmp;
1655
1656 dcomplex* w21 = buff_w + (i+1)*inc_w;
1657
1658 dcomplex* a22l = buff_al + (i+1)*inc_al;
1659
1660 dcomplex* a12p = buff_ap + (i+1)*inc_ap;
1661
1662 dcomplex* u21 = buff_u + (i+1)*inc_u;
1663
1664 dcomplex* u21p = buff_up + (i+1)*inc_up;
1665
1666 dcomplex* v21 = buff_v + (i+1)*inc_v;
1667
1668 dcomplex* d0 = buff_d + (0 )*inc_d;
1669
1670 dcomplex* e0 = buff_e + (0 )*inc_e;
1671
1672 dcomplex* a12p_t = a12p + (0 )*inc_ap;
1673 dcomplex* a12p_b = a12p + (1 )*inc_ap;
1674
1675 dcomplex* v21_t = v21 + (0 )*inc_v;
1676 dcomplex* v21_b = v21 + (1 )*inc_v;
1677
1678 dcomplex* a01_b = a01 + (0 )*cs_A + (i-1)*rs_A;
1679
1680 dcomplex* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
1681 dcomplex* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
1682
1683 dcomplex* ABL = a10t;
1684 dcomplex* ZBL = z10t;
1685
1686 dcomplex* a2 = alpha11;
1687
1688 int m_ahead = m_A - i - 1;
1689 int n_ahead = n_A - i - 1;
1690 int m_behind = i;
1691 int n_behind = i;
1692
1693 /*------------------------------------------------------------*/
1694
1695 if ( m_behind > 0 )
1696 {
1697 // FLA_Copy( a01_b, last_elem );
1698 // FLA_Set( FLA_ONE, a01_b );
1699 last_elem = *a01_b;
1700 *a01_b = *buff_1;
1701 }
1702
1703 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
1704 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a01, FLA_ONE, a2 );
1707 m_ahead + 1,
1708 n_behind,
1709 buff_m1,
1710 ABL, rs_A, cs_A,
1711 y10t, cs_Y,
1712 buff_1,
1713 a2, rs_A );
1716 m_ahead + 1,
1717 n_behind,
1718 buff_m1,
1719 ZBL, rs_Z, cs_Z,
1720 a01, rs_A,
1721 buff_1,
1722 a2, rs_A );
1723
1724 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
1725 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, A02, z10t, FLA_ONE, a12t );
1728 n_ahead,
1729 n_behind,
1730 buff_m1,
1731 Y20, rs_Y, cs_Y,
1732 a10t, cs_A,
1733 buff_1,
1734 a12t, cs_A );
1737 m_behind,
1738 n_ahead,
1739 buff_m1,
1740 A02, rs_A, cs_A,
1741 z10t, cs_Z,
1742 buff_1,
1743 a12t, cs_A );
1744
1745 if ( m_behind > 0 )
1746 {
1747 // FLA_Copy( last_elem, a01_b );
1748 *a01_b = last_elem;
1749 }
1750
1751 // FLA_Househ2_UT( FLA_LEFT,
1752 // alpha11,
1753 // a21, tau11 );
1754 // FLA_Copy( a21, u21p );
1756 alpha11,
1757 a21, rs_A,
1758 tau11 );
1760 m_ahead,
1761 a21, rs_A,
1762 u21p, inc_up );
1763
1764 if ( n_ahead > 0 )
1765 {
1766 // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
1767 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
1769
1770 // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
1771 // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
1773 n_ahead,
1774 a12t, cs_A,
1775 a12p, inc_ap );
1777 n_ahead,
1779 a12t, cs_A,
1780 a12p, inc_ap );
1781
1782 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21p, FLA_ZERO, d0 );
1783 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, u21p, FLA_ZERO, e0 );
1786 m_ahead,
1787 n_behind,
1788 buff_1,
1789 A20, rs_A, cs_A,
1790 u21p, inc_up,
1791 buff_0,
1792 d0, inc_d );
1795 m_ahead,
1796 n_behind,
1797 buff_1,
1798 Z20, rs_Z, cs_Z,
1799 u21p, inc_up,
1800 buff_0,
1801 e0, inc_e );
1802
1803 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
1804 // FLA_Axpy( FLA_ONE, d0, t01 );
1806 n_behind,
1807 a10t, cs_A,
1808 t01, rs_T );
1810 n_behind,
1811 buff_1,
1812 d0, inc_d,
1813 t01, rs_T );
1814
1815 // FLA_Set( FLA_ZERO, y21 );
1816 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
1817 // FLA_Gemv( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, e0, FLA_ONE, y21 );
1819 buff_0,
1820 y21, rs_Y );
1823 n_ahead,
1824 n_behind,
1825 buff_m1,
1826 Y20, rs_Y, cs_Y,
1827 d0, inc_d,
1828 buff_1,
1829 y21, rs_Y );
1832 m_behind,
1833 n_ahead,
1834 buff_m1,
1835 A02, rs_A, cs_A,
1836 e0, inc_e,
1837 buff_1,
1838 y21, rs_Y );
1839
1840 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, u21p, FLA_ONE, y21 );
1841 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
1842 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
1844 n_ahead,
1845 tau11,
1846 buff_1,
1847 A22, rs_A, cs_A,
1848 u21p, inc_up,
1849 a12p, inc_ap,
1850 y21, rs_Y,
1851 w21, inc_w );
1852
1853 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, Y20, a12p, FLA_ZERO, f0 );
1854 // FLA_Gemvc( FLA_CONJ_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A02, a12p, FLA_ZERO, g0 );
1855 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, w21 );
1856 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, g0, FLA_ONE, w21 );
1857 // FLA_Copy( A22_l, a22l );
1858 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, A20, Y20_t, FLA_ONE, a22l );
1859 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, Z20, A02_l, FLA_ONE, a22l );
1860 // FLA_Copy( g0, s01 );
1862 n_behind,
1863 m_behind,
1864 n_ahead,
1865 buff_m1,
1866 A20, rs_A, cs_A,
1867 Y20, rs_Y, cs_Y,
1868 Z20, rs_Z, cs_Z,
1869 A02, rs_A, cs_A,
1870 A22, rs_A, cs_A,
1871 tmp21, inc_tmp,
1872 s01, rs_S,
1873 a12p, inc_ap,
1874 w21, inc_w,
1875 a22l, inc_al );
1876
1877 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
1879 n_ahead,
1880 buff_1,
1881 a12t, cs_A,
1882 y21, rs_Y );
1883
1884 // FLA_Househ2s_UT( FLA_RIGHT,
1885 // a12p_t,
1886 // a12p_b,
1887 // alpha12, psi11_minus_alpha12, sigma11 );
1889 a12p_t,
1890 a12p_b, inc_ap,
1891 &alpha12,
1893 sigma11 );
1894
1895 // FLA_Copy( a12p, v21 );
1896 // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
1897 // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
1898 // FLA_Conjugate( v21_b );
1900 n_ahead,
1901 a12p, inc_ap,
1902 v21, inc_v );
1905 n_ahead,
1907 v21, inc_v );
1908 bl1_zconjv( n_ahead - 1,
1909 v21_b, inc_v );
1910
1911 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
1912 // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
1915
1916 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_conj_alpha12, A02_l, s01 );
1917 // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, s01 );
1919 n_behind,
1921 A02, rs_A,
1922 s01, rs_S );
1924 n_behind,
1926 s01, rs_S );
1927
1928 // FLA_Copy( alpha12, a12t_l );
1929 // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
1930 *a12t_l = alpha12;
1932 n_ahead - 1,
1933 v21_b, inc_v,
1934 a12t_r, cs_A );
1935 }
1936
1937 // FLA_Copy( u21p, u21 );
1939 m_ahead,
1940 u21p, inc_up,
1941 u21, inc_u );
1942
1943 if ( n_ahead > 0 )
1944 {
1945 // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
1946 // FLA_Scal( FLA_MINUS_ONE, beta );
1947 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
1949 n_ahead,
1950 y21, rs_Y,
1951 v21, inc_v,
1952 &beta );
1954
1955 // FLA_Copy( w21, z21 );
1956 // FLA_Axpy( minus_conj_alpha12, a22l, z21 );
1957 // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
1958 // FLA_Axpy( beta, u21, z21 );
1960 m_ahead,
1961 w21, inc_w,
1962 z21, rs_Z );
1964 m_ahead,
1966 a22l, inc_al,
1967 z21, rs_Z );
1969 m_ahead,
1971 z21, rs_Z );
1973 m_ahead,
1974 &beta,
1975 u21, inc_u,
1976 z21, rs_Z );
1977
1978 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
1979 // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
1981 n_ahead,
1982 tau11,
1983 y21, rs_Y );
1985 m_ahead,
1986 sigma11,
1987 z21, rs_Z );
1988 }
1989 else // if ( n_ahead == 0 )
1990 {
1991 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
1992 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
1994 n_behind,
1995 a10t, cs_A,
1996 t01, rs_T );
1999 m_ahead,
2000 n_behind,
2001 buff_1,
2002 A20, rs_A, cs_A,
2003 u21, inc_u,
2004 buff_1,
2005 t01, rs_T );
2006 }
2007
2008 /*------------------------------------------------------------*/
2009
2010 }
2011
2012 // FLA_Obj_free( &w );
2013 // FLA_Obj_free( &al );
2014 // FLA_Obj_free( &ap );
2015 // FLA_Obj_free( &u );
2016 // FLA_Obj_free( &up );
2017 // FLA_Obj_free( &v );
2018 // FLA_Obj_free( &d );
2019 // FLA_Obj_free( &e );
2020 FLA_free( buff_tmp );
2021 FLA_free( buff_w );
2022 FLA_free( buff_al );
2023 FLA_free( buff_ap );
2024 FLA_free( buff_u );
2025 FLA_free( buff_up );
2026 FLA_free( buff_v );
2027 FLA_free( buff_d );
2028 FLA_free( buff_e );
2029
2030 return FLA_SUCCESS;
2031}
FLA_Error FLA_Fused_UYx_ZVx_opz_var1(int m_U, int n_U, int m_V, int n_V, dcomplex *buff_delta, dcomplex *buff_U, int rs_U, int cs_U, dcomplex *buff_Y, int rs_Y, int cs_Y, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_V, int rs_V, int cs_V, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_temp, int inc_temp, dcomplex *buff_t, int inc_t, dcomplex *buff_a, int inc_a, dcomplex *buff_w, int inc_w, dcomplex *buff_al, int inc_al)
Definition FLA_Fused_UYx_ZVx_opt_var1.c:542
void bl1_zsetv(int m, dcomplex *sigma, dcomplex *x, int incx)
Definition bl1_setv.c:66
void bl1_zsetm(int m, int n, dcomplex *sigma, dcomplex *a, int a_rs, int a_cs)
Definition bl1_setm.c:78

References bl1_zaxpyv(), bl1_zconjv(), bl1_zcopyv(), bl1_zdot(), bl1_zgemv(), bl1_zinvscalv(), bl1_zscals(), bl1_zsetm(), bl1_zsetv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_free(), FLA_Fused_Ahx_Axpy_Ax_opz_var1(), FLA_Fused_UYx_ZVx_opz_var1(), FLA_Househ2_UT_l_opz(), FLA_Househ2s_UT_r_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_ofu_var4().

◆ FLA_Bidiag_UT_u_step_opc_var1()

FLA_Error FLA_Bidiag_UT_u_step_opc_var1 ( int  m_A,
int  n_A,
int  m_TS,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_T,
int  rs_T,
int  cs_T,
scomplex buff_S,
int  rs_S,
int  cs_S 
)
390{
393
394 int i;
395
396 // b_alg = FLA_Obj_length( T );
397 int b_alg = m_TS;
398
399 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
400 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
401 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
402 scomplex* buff_v = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
403 int inc_v = 1;
404
405 for ( i = 0; i < b_alg; ++i )
406 {
407 scomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
408 scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
409 scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
410 scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
411 scomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
412 scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
413 scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
414
415 scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
416 scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
417
418 scomplex* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
419 scomplex* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
420
421 scomplex* v21 = buff_v + (i+1)*inc_v;
422
423 scomplex* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
424 scomplex* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
425
426 scomplex* A22_l = A22 + (0 )*cs_A + (0 )*rs_A;
427 scomplex* A22_r = A22 + (1 )*cs_A + (0 )*rs_A;
428
429 scomplex* v21_t = v21 + (0 )*inc_v;
430 scomplex* v21_b = v21 + (1 )*inc_v;
431
432 int m_ahead = m_A - i - 1;
433 int n_ahead = n_A - i - 1;
434 int m_behind = i;
435 int n_behind = i;
436
437 /*------------------------------------------------------------*/
438
439 // FLA_Househ2_UT( FLA_LEFT,
440 // alpha11,
441 // a21, tau11 );
443 alpha11,
444 a21, rs_A,
445 tau11 );
446
447 if ( n_ahead > 0 )
448 {
449 // FLA_Apply_H2_UT( FLA_LEFT, tau11, a21, a12t, A22 );
451 n_ahead,
452 tau11,
453 a21, rs_A,
454 a12t, cs_A,
455 A22, rs_A, cs_A );
456
457 // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
459 a12t_l,
460 a12t_r, cs_A,
461 sigma11 );
462
463 // FLA_Set( FLA_ONE, v21_t );
464 // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
465 *v21_t = *buff_1;
467 n_ahead - 1,
468 a12t_r, cs_A,
469 v21_b, inc_v );
470
471 // FLA_Apply_H2_UT( FLA_RIGHT, sigma11, v21_b, A22_l, A22_r );
473 n_ahead - 1,
474 sigma11,
475 v21_b, inc_v,
476 A22_l, rs_A,
477 A22_r, rs_A, cs_A );
478
479 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
482 m_behind,
483 n_ahead,
484 buff_1,
485 A02, rs_A, cs_A,
486 v21, inc_v,
487 buff_0,
488 s01, rs_S );
489 }
490
491 // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
492 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
494 n_behind,
495 a10t, cs_A,
496 t01, rs_T );
499 m_ahead,
500 n_behind,
501 buff_1,
502 A20, rs_A, cs_A,
503 a21, rs_A,
504 buff_1,
505 t01, rs_T );
506
507 /*------------------------------------------------------------*/
508
509 }
510
511 // FLA_Obj_free( &v );
512 FLA_free( buff_v );
513
514 return FLA_SUCCESS;
515}
FLA_Error FLA_Apply_H2_UT_l_opc_var1(int m_u2_A2, int n_a1t, scomplex *tau, scomplex *u2, int inc_u2, scomplex *a1t, int inc_a1t, scomplex *A2, int rs_A2, int cs_A2)
Definition FLA_Apply_H2_UT_l_opt_var1.c:269
FLA_Error FLA_Apply_H2_UT_r_opc_var1(int n_u2h_A2, int m_a1, scomplex *tau, scomplex *u2h, int inc_u2h, scomplex *a1, int inc_a1, scomplex *A2, int rs_A2, int cs_A2)
Definition FLA_Apply_H2_UT_r_opt_var1.c:254

References bl1_ccopyv(), bl1_cgemv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, FLA_Apply_H2_UT_l_opc_var1(), FLA_Apply_H2_UT_r_opc_var1(), FLA_free(), FLA_Househ2_UT_l_opc(), FLA_Househ2_UT_r_opc(), FLA_malloc(), FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var1().

◆ FLA_Bidiag_UT_u_step_opc_var2()

FLA_Error FLA_Bidiag_UT_u_step_opc_var2 ( int  m_A,
int  n_A,
int  m_TS,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_T,
int  rs_T,
int  cs_T,
scomplex buff_S,
int  rs_S,
int  cs_S 
)
536{
540
542 int i;
543
544 // b_alg = FLA_Obj_length( T );
545 int b_alg = m_TS;
546
547 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
548 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
549 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
550 scomplex* buff_v = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
551 scomplex* buff_y = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
552 scomplex* buff_z = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
553 int inc_v = 1;
554 int inc_y = 1;
555 int inc_z = 1;
556
557 for ( i = 0; i < b_alg; ++i )
558 {
559 scomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
560 scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
561 scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
562 scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
563 scomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
564 scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
565 scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
566
567 scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
568 scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
569
570 scomplex* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
571 scomplex* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
572
573 scomplex* v21 = buff_v + (i+1)*inc_v;
574
575 scomplex* y21 = buff_y + (i+1)*inc_y;
576
577 scomplex* z21 = buff_z + (i+1)*inc_z;
578
579 scomplex* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
580 scomplex* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
581
582 scomplex* v21_t = v21 + (0 )*inc_v;
583 scomplex* v21_b = v21 + (1 )*inc_v;
584
585 int m_ahead = m_A - i - 1;
586 int n_ahead = n_A - i - 1;
587 int m_behind = i;
588 int n_behind = i;
589
590 /*------------------------------------------------------------*/
591
592 // FLA_Househ2_UT( FLA_LEFT,
593 // alpha11,
594 // a21, tau11 );
596 alpha11,
597 a21, rs_A,
598 tau11 );
599
600 if ( n_ahead > 0 )
601 {
602 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a12t, y21 );
603 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, a21, FLA_ONE, y21 );
605 n_ahead,
606 a12t, cs_A,
607 y21, inc_y );
610 m_ahead,
611 n_ahead,
612 buff_1,
613 A22, rs_A, cs_A,
614 a21, rs_A,
615 buff_1,
616 y21, inc_y );
617
618 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
620 n_ahead,
621 tau11,
622 y21, inc_y );
623
624 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, y21, a12t );
626 n_ahead,
627 buff_m1,
628 y21, inc_y,
629 a12t, cs_A );
630
631 // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
633 a12t_l,
634 a12t_r, cs_A,
635 sigma11 );
636
637 // FLA_Set( FLA_ONE, v21_t );
638 // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
639 *v21_t = *buff_1;
641 n_ahead - 1,
642 a12t_r, cs_A,
643 v21_b, inc_y );
644
645 // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
646 // FLA_Scal( FLA_MINUS_ONE, beta );
648 n_ahead,
649 y21, inc_y,
650 v21, inc_v,
651 &beta );
652 bl1_cneg1( &beta );
653
654 // FLA_Copy( a21, z21 );
655 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, v21, beta, z21 );
656 // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
658 m_ahead,
659 a21, rs_A,
660 z21, inc_z );
663 m_ahead,
664 n_ahead,
665 buff_1,
666 A22, rs_A, cs_A,
667 v21, inc_v,
668 &beta,
669 z21, inc_z );
671 m_ahead,
672 sigma11,
673 z21, inc_z );
674
675 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, a21, y21, A22 );
676 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
679 m_ahead,
680 n_ahead,
681 buff_m1,
682 a21, rs_A,
683 y21, inc_y,
684 A22, rs_A, cs_A );
687 m_ahead,
688 n_ahead,
689 buff_m1,
690 z21, inc_z,
691 v21, inc_v,
692 A22, rs_A, cs_A );
693
694 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
697 m_behind,
698 n_ahead,
699 buff_1,
700 A02, rs_A, cs_A,
701 v21, inc_v,
702 buff_0,
703 s01, rs_S );
704 }
705
706 // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
707 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
709 n_behind,
710 a10t, cs_A,
711 t01, rs_T );
714 m_ahead,
715 n_behind,
716 buff_1,
717 A20, rs_A, cs_A,
718 a21, rs_A,
719 buff_1,
720 t01, rs_T );
721
722 /*------------------------------------------------------------*/
723
724 }
725
726 // FLA_Obj_free( &v );
727 // FLA_Obj_free( &y );
728 // FLA_Obj_free( &z );
729 FLA_free( buff_v );
730 FLA_free( buff_y );
731 FLA_free( buff_z );
732
733 return FLA_SUCCESS;
734}

References bl1_caxpyv(), bl1_ccopyv(), bl1_cdot(), bl1_cgemv(), bl1_cger(), bl1_cinvscalv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opc(), FLA_Househ2_UT_r_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var2().

◆ FLA_Bidiag_UT_u_step_opc_var3()

FLA_Error FLA_Bidiag_UT_u_step_opc_var3 ( int  m_A,
int  n_A,
int  m_TS,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_T,
int  rs_T,
int  cs_T,
scomplex buff_S,
int  rs_S,
int  cs_S 
)
956{
960
970 int i;
971
972 // b_alg = FLA_Obj_length( T );
973 int b_alg = m_TS;
974
975 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
976 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
977 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
978 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
979 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
980 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
981 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
982 scomplex* buff_w = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
983 scomplex* buff_ap = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
984 scomplex* buff_u = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
985 scomplex* buff_up = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
986 scomplex* buff_v = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
987 scomplex* buff_y = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
988 scomplex* buff_z = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
989 int inc_w = 1;
990 int inc_ap = 1;
991 int inc_u = 1;
992 int inc_up = 1;
993 int inc_v = 1;
994 int inc_y = 1;
995 int inc_z = 1;
996
997 for ( i = 0; i < b_alg; ++i )
998 {
999 scomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
1000 scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
1001 scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
1002 scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
1003 scomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
1004 scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
1005 scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
1006
1007 scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
1008 scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
1009
1010 scomplex* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
1011 scomplex* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
1012
1013 scomplex* w21 = buff_w + (i+1)*inc_w;
1014
1015 scomplex* a12p = buff_ap + (i+1)*inc_ap;
1016
1018 scomplex* u21 = buff_u + (i+1)*inc_u;
1019
1020 scomplex* u21p = buff_up + (i+1)*inc_up;
1021
1022 scomplex* nu11 = buff_v + (i )*inc_v;
1023 scomplex* v21 = buff_v + (i+1)*inc_v;
1024
1025 scomplex* psi11 = buff_y + (i )*inc_y;
1026 scomplex* y21 = buff_y + (i+1)*inc_y;
1027
1028 scomplex* zeta11 = buff_z + (i )*inc_z;
1029 scomplex* z21 = buff_z + (i+1)*inc_z;
1030
1031 scomplex* a12p_t = a12p + (0 )*inc_ap;
1032 scomplex* a12p_b = a12p + (1 )*inc_ap;
1033
1034 scomplex* v21_t = v21 + (0 )*inc_v;
1035 scomplex* v21_b = v21 + (1 )*inc_v;
1036
1037 scomplex* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
1038 scomplex* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
1039
1040 scomplex* A22_l = A22 + (0 )*cs_A + (0 )*rs_A;
1041
1042 int m_ahead = m_A - i - 1;
1043 int n_ahead = n_A - i - 1;
1044 int m_behind = i;
1045 int n_behind = i;
1046
1047 /*------------------------------------------------------------*/
1048
1049 if ( m_behind > 0 )
1050 {
1051 // FLA_Copy( upsilon11, minus_upsilon11 );
1052 // FLA_Scal( FLA_MINUS_ONE, minus_upsilon11 );
1054
1055 // FLA_Copy( zeta11, minus_zeta11 );
1056 // FLA_Scal( FLA_MINUS_ONE, minus_zeta11 );
1058
1059 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, psi11, minus_conj_psi11 );
1060 // FLA_Scal( FLA_MINUS_ONE, minus_conj_psi11 );
1063
1064 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, nu11, minus_conj_nu11 );
1065 // FLA_Scal( FLA_MINUS_ONE, minus_conj_nu11 );
1068
1069 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, upsilon11, alpha11 );
1070 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11, zeta11, alpha11 );
1072 1,
1074 upsilon11, 1,
1075 alpha11, 1 );
1077 1,
1079 zeta11, 1,
1080 alpha11, 1 );
1081
1082 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, u21, a21 );
1083 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11, z21, a21 );
1085 m_ahead,
1087 u21, inc_u,
1088 a21, rs_A );
1090 m_ahead,
1092 z21, inc_z,
1093 a21, rs_A );
1094
1095 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_upsilon11, y21, a12t );
1096 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_zeta11, v21, a12t );
1098 n_ahead,
1100 y21, inc_y,
1101 a12t, cs_A );
1103 n_ahead,
1104 &minus_zeta11,
1105 v21, inc_v,
1106 a12t, cs_A );
1107 }
1108
1109 // FLA_Househ2_UT( FLA_LEFT,
1110 // alpha11,
1111 // a21, tau11 );
1112 // FLA_Copy( a21, u21p );
1114 alpha11,
1115 a21, rs_A,
1116 tau11 );
1118 m_ahead,
1119 a21, rs_A,
1120 u21p, inc_up );
1121
1122 if ( n_ahead > 0 )
1123 {
1124 // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
1125 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
1127
1128 // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
1129 // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
1131 n_ahead,
1132 a12t, cs_A,
1133 a12p, inc_ap );
1135 n_ahead,
1137 a12t, cs_A,
1138 a12p, inc_ap );
1139 }
1140
1141 if ( m_behind > 0 )
1142 {
1143 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u21, y21, A22 );
1144 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
1147 m_ahead,
1148 n_ahead,
1149 buff_m1,
1150 u21, inc_u,
1151 y21, inc_y,
1152 A22, rs_A, cs_A );
1155 m_ahead,
1156 n_ahead,
1157 buff_m1,
1158 z21, inc_z,
1159 v21, inc_v,
1160 A22, rs_A, cs_A );
1161 }
1162
1163 if ( n_ahead > 0 )
1164 {
1165 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, u21p, FLA_ZERO, y21 );
1168 m_ahead,
1169 n_ahead,
1170 buff_1,
1171 A22, rs_A, cs_A,
1172 u21p, inc_up,
1173 buff_0,
1174 y21, inc_y );
1175
1176 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
1178 n_ahead,
1180 y21, inc_y,
1181 a12p, inc_ap );
1182
1183 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
1186 m_ahead,
1187 n_ahead,
1188 buff_1,
1189 A22, rs_A, cs_A,
1190 a12p, inc_ap,
1191 buff_0,
1192 w21, inc_w );
1193
1194 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
1196 n_ahead,
1197 buff_1,
1198 a12t, cs_A,
1199 y21, inc_y );
1200
1201 // FLA_Househ2s_UT( FLA_RIGHT,
1202 // a12p_t,
1203 // a12p_b,
1204 // alpha12, psi11_minus_alpha12, sigma11 );
1206 a12p_t,
1207 a12p_b, inc_ap,
1208 &alpha12,
1210 sigma11 );
1211
1212 // FLA_Copy( a12p, v21 );
1213 // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
1214 // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
1215 // FLA_Conjugate( v21_b );
1217 n_ahead,
1218 a12p, inc_ap,
1219 v21, inc_v );
1222 n_ahead,
1224 v21, inc_v );
1225 bl1_cconjv( n_ahead - 1,
1226 v21_b, inc_v );
1227
1228 // FLA_Copyt( FLA_NO_TRANSPOSE, alpha12, a12t_l );
1229 // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
1230 *a12t_l = alpha12;
1232 n_ahead - 1,
1233 v21_b, inc_v,
1234 a12t_r, cs_A );
1235 }
1236
1237 // FLA_Copy( u21p, u21 );
1239 m_ahead,
1240 u21p, inc_up,
1241 u21, inc_u );
1242
1243 if ( n_ahead > 0 )
1244 {
1245 // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
1246 // FLA_Scal( FLA_MINUS_ONE, beta );
1247 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
1249 n_ahead,
1250 y21, inc_y,
1251 v21, inc_v,
1252 &beta );
1254
1255 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
1256 // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
1259
1260 // FLA_Copy( w21, z21 );
1261 // FLA_Axpy( minus_conj_alpha12, A22_l, z21 );
1262 // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
1263 // FLA_Axpy( beta, u21, z21 );
1265 m_ahead,
1266 w21, inc_w,
1267 z21, inc_z );
1269 m_ahead,
1271 A22_l, rs_A,
1272 z21, inc_z );
1274 m_ahead,
1276 z21, inc_z );
1278 m_ahead,
1279 &beta,
1280 u21, inc_u,
1281 z21, inc_z );
1282
1283 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
1284 // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
1286 n_ahead,
1287 tau11,
1288 y21, inc_y );
1290 m_ahead,
1291 sigma11,
1292 z21, inc_z );
1293
1294 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
1297 m_behind,
1298 n_ahead,
1299 buff_1,
1300 A02, rs_A, cs_A,
1301 v21, inc_v,
1302 buff_0,
1303 s01, rs_S );
1304 }
1305
1306 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
1307 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
1309 n_behind,
1310 a10t, cs_A,
1311 t01, rs_T );
1314 m_ahead,
1315 n_behind,
1316 buff_1,
1317 A20, rs_A, cs_A,
1318 u21, inc_u,
1319 buff_1,
1320 t01, rs_T );
1321
1322 if ( m_behind + 1 == b_alg && n_ahead > 0 )
1323 {
1324 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u21, y21, A22 );
1325 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
1328 m_ahead,
1329 n_ahead,
1330 buff_m1,
1331 u21, inc_u,
1332 y21, inc_y,
1333 A22, rs_A, cs_A );
1336 m_ahead,
1337 n_ahead,
1338 buff_m1,
1339 z21, inc_z,
1340 v21, inc_v,
1341 A22, rs_A, cs_A );
1342 }
1343
1344 /*------------------------------------------------------------*/
1345
1346 }
1347
1348 // FLA_Obj_free( &w );
1349 // FLA_Obj_free( &ap );
1350 // FLA_Obj_free( &u );
1351 // FLA_Obj_free( &up );
1352 // FLA_Obj_free( &v );
1353 // FLA_Obj_free( &y );
1354 // FLA_Obj_free( &z );
1355 FLA_free( buff_w );
1356 FLA_free( buff_ap );
1357 FLA_free( buff_u );
1358 FLA_free( buff_up );
1359 FLA_free( buff_v );
1360 FLA_free( buff_y );
1361 FLA_free( buff_z );
1362
1363 return FLA_SUCCESS;
1364}

References bl1_caxpyv(), bl1_cconjv(), bl1_ccopyv(), bl1_cdot(), bl1_cgemv(), bl1_cger(), bl1_cinvscalv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opc(), FLA_Househ2s_UT_r_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var3().

◆ FLA_Bidiag_UT_u_step_opc_var4()

FLA_Error FLA_Bidiag_UT_u_step_opc_var4 ( int  m_A,
int  n_A,
int  m_TS,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_Y,
int  rs_Y,
int  cs_Y,
scomplex buff_Z,
int  rs_Z,
int  cs_Z,
scomplex buff_T,
int  rs_T,
int  cs_T,
scomplex buff_S,
int  rs_S,
int  cs_S 
)
1259{
1263
1268 scomplex beta;
1270 int i;
1271
1272 // b_alg = FLA_Obj_length( T );
1273 int b_alg = m_TS;
1274
1275 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
1276 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &al );
1277 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
1278 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
1279 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
1280 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
1281 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &d );
1282 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &e );
1283 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
1284 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &g );
1285 scomplex* buff_w = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1286 scomplex* buff_al = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1287 scomplex* buff_ap = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1288 scomplex* buff_u = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1289 scomplex* buff_up = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1290 scomplex* buff_v = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1291 scomplex* buff_d = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1292 scomplex* buff_e = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1293 scomplex* buff_f = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1294 scomplex* buff_g = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1295 int inc_w = 1;
1296 int inc_al = 1;
1297 int inc_ap = 1;
1298 int inc_u = 1;
1299 int inc_up = 1;
1300 int inc_v = 1;
1301 int inc_d = 1;
1302 int inc_e = 1;
1303 int inc_f = 1;
1304 int inc_g = 1;
1305
1306 // FLA_Set( FLA_ZERO, Y );
1307 // FLA_Set( FLA_ZERO, Z );
1308 bl1_csetm( n_A,
1309 b_alg,
1310 buff_0,
1311 buff_Y, rs_Y, cs_Y );
1312 bl1_csetm( m_A,
1313 b_alg,
1314 buff_0,
1315 buff_Z, rs_Z, cs_Z );
1316
1317 for ( i = 0; i < b_alg; ++i )
1318 {
1319 scomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
1320 scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
1321 scomplex* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
1322 scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
1323 scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
1324 scomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
1325 scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
1326 scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
1327
1328 scomplex* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
1329 scomplex* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
1330 scomplex* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
1331
1332 scomplex* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
1333 scomplex* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
1334 scomplex* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
1335
1336 scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
1337 scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
1338
1339 scomplex* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
1340 scomplex* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
1341
1342 scomplex* w21 = buff_w + (i+1)*inc_w;
1343
1344 scomplex* a22l = buff_al + (i+1)*inc_al;
1345
1346 scomplex* a12p = buff_ap + (i+1)*inc_ap;
1347
1348 scomplex* u21 = buff_u + (i+1)*inc_u;
1349
1350 scomplex* u21p = buff_up + (i+1)*inc_up;
1351
1352 scomplex* v21 = buff_v + (i+1)*inc_v;
1353
1354 scomplex* d0 = buff_d + (0 )*inc_d;
1355
1356 scomplex* e0 = buff_e + (0 )*inc_e;
1357
1358 scomplex* f0 = buff_f + (0 )*inc_f;
1359
1360 scomplex* g0 = buff_g + (0 )*inc_g;
1361
1362 scomplex* a12p_t = a12p + (0 )*inc_ap;
1363 scomplex* a12p_b = a12p + (1 )*inc_ap;
1364
1365 scomplex* v21_t = v21 + (0 )*inc_v;
1366 scomplex* v21_b = v21 + (1 )*inc_v;
1367
1368 scomplex* a01_b = a01 + (0 )*cs_A + (i-1)*rs_A;
1369
1370 scomplex* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
1371 scomplex* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
1372
1373 scomplex* A02_l = A02 + (0 )*cs_A + (0 )*rs_A;
1374
1375 scomplex* A22_l = A22 + (0 )*cs_A + (0 )*rs_A;
1376
1377 scomplex* Y20_t = Y20 + (0 )*cs_Y + (0 )*rs_Y;
1378
1379 scomplex* ABL = a10t;
1380 scomplex* ZBL = z10t;
1381
1382 scomplex* a2 = alpha11;
1383
1384 int m_ahead = m_A - i - 1;
1385 int n_ahead = n_A - i - 1;
1386 int m_behind = i;
1387 int n_behind = i;
1388
1389 /*------------------------------------------------------------*/
1390
1391 if ( m_behind > 0 )
1392 {
1393 // FLA_Copy( a01_b, last_elem );
1394 // FLA_Set( FLA_ONE, a01_b );
1395 last_elem = *a01_b;
1396 *a01_b = *buff_1;
1397 }
1398
1399 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
1400 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a01, FLA_ONE, a2 );
1403 m_ahead + 1,
1404 n_behind,
1405 buff_m1,
1406 ABL, rs_A, cs_A,
1407 y10t, cs_Y,
1408 buff_1,
1409 a2, rs_A );
1412 m_ahead + 1,
1413 n_behind,
1414 buff_m1,
1415 ZBL, rs_Z, cs_Z,
1416 a01, rs_A,
1417 buff_1,
1418 a2, rs_A );
1419
1420 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
1421 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, A02, z10t, FLA_ONE, a12t );
1424 n_ahead,
1425 n_behind,
1426 buff_m1,
1427 Y20, rs_Y, cs_Y,
1428 a10t, cs_A,
1429 buff_1,
1430 a12t, cs_A );
1433 m_behind,
1434 n_ahead,
1435 buff_m1,
1436 A02, rs_A, cs_A,
1437 z10t, cs_Z,
1438 buff_1,
1439 a12t, cs_A );
1440
1441 if ( m_behind > 0 )
1442 {
1443 // FLA_Copy( last_elem, a01_b );
1444 *a01_b = last_elem;
1445 }
1446
1447 // FLA_Househ2_UT( FLA_LEFT,
1448 // alpha11,
1449 // a21, tau11 );
1450 // FLA_Copy( a21, u21p );
1452 alpha11,
1453 a21, rs_A,
1454 tau11 );
1456 m_ahead,
1457 a21, rs_A,
1458 u21p, inc_up );
1459
1460 if ( n_ahead > 0 )
1461 {
1462 // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
1463 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
1465
1466 // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
1467 // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
1469 n_ahead,
1470 a12t, cs_A,
1471 a12p, inc_ap );
1473 n_ahead,
1475 a12t, cs_A,
1476 a12p, inc_ap );
1477
1478 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21p, FLA_ZERO, d0 );
1479 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, u21p, FLA_ZERO, e0 );
1482 m_ahead,
1483 n_behind,
1484 buff_1,
1485 A20, rs_A, cs_A,
1486 u21p, inc_up,
1487 buff_0,
1488 d0, inc_d );
1491 m_ahead,
1492 n_behind,
1493 buff_1,
1494 Z20, rs_Z, cs_Z,
1495 u21p, inc_up,
1496 buff_0,
1497 e0, inc_e );
1498
1499 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
1500 // FLA_Axpy( FLA_ONE, d0, t01 );
1502 n_behind,
1503 a10t, cs_A,
1504 t01, rs_T );
1506 n_behind,
1507 buff_1,
1508 d0, inc_d,
1509 t01, rs_T );
1510
1511 // FLA_Set( FLA_ZERO, y21 );
1512 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
1513 // FLA_Gemv( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, e0, FLA_ONE, y21 );
1515 buff_0,
1516 y21, rs_Y );
1519 n_ahead,
1520 n_behind,
1521 buff_m1,
1522 Y20, rs_Y, cs_Y,
1523 d0, inc_d,
1524 buff_1,
1525 y21, rs_Y );
1528 m_behind,
1529 n_ahead,
1530 buff_m1,
1531 A02, rs_A, cs_A,
1532 e0, inc_e,
1533 buff_1,
1534 y21, rs_Y );
1535
1536 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, u21p, FLA_ONE, y21 );
1539 m_ahead,
1540 n_ahead,
1541 buff_1,
1542 A22, rs_A, cs_A,
1543 u21p, inc_up,
1544 buff_1,
1545 y21, rs_Y );
1546
1547 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
1549 n_ahead,
1551 y21, rs_Y,
1552 a12p, inc_ap );
1553
1554 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
1557 m_ahead,
1558 n_ahead,
1559 buff_1,
1560 A22, rs_A, cs_A,
1561 a12p, inc_ap,
1562 buff_0,
1563 w21, inc_w );
1564
1565 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, Y20, a12p, FLA_ZERO, f0 );
1566 // FLA_Gemvc( FLA_CONJ_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A02, a12p, FLA_ZERO, g0 );
1569 n_ahead,
1570 n_behind,
1571 buff_1,
1572 Y20, rs_Y, cs_Y,
1573 a12p, inc_ap,
1574 buff_0,
1575 f0, inc_f );
1578 m_behind,
1579 n_ahead,
1580 buff_1,
1581 A02, rs_A, cs_A,
1582 a12p, inc_ap,
1583 buff_0,
1584 g0, inc_g );
1585
1586 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, w21 );
1587 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, g0, FLA_ONE, w21 );
1590 m_ahead,
1591 n_behind,
1592 buff_m1,
1593 A20, rs_A, cs_A,
1594 f0, inc_f,
1595 buff_1,
1596 w21, inc_w );
1599 m_ahead,
1600 n_behind,
1601 buff_m1,
1602 Z20, rs_Z, cs_Z,
1603 g0, inc_g,
1604 buff_1,
1605 w21, inc_w );
1606
1607 // FLA_Copy( A22_l, a22l );
1608 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, A20, Y20_t, FLA_ONE, a22l );
1609 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, Z20, A02_l, FLA_ONE, a22l );
1611 m_ahead,
1612 A22_l, rs_A,
1613 a22l, inc_al );
1616 m_ahead,
1617 n_behind,
1618 buff_m1,
1619 A20, rs_A, cs_A,
1620 Y20_t, cs_Y,
1621 buff_1,
1622 a22l, inc_al );
1625 m_ahead,
1626 n_behind,
1627 buff_m1,
1628 Z20, rs_Z, cs_Z,
1629 A02_l, rs_A,
1630 buff_1,
1631 a22l, inc_al );
1632
1633 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
1635 n_ahead,
1636 buff_1,
1637 a12t, cs_A,
1638 y21, rs_Y );
1639
1640 // FLA_Househ2s_UT( FLA_RIGHT,
1641 // a12p_t,
1642 // a12p_b,
1643 // alpha12, psi11_minus_alpha12, sigma11 );
1645 a12p_t,
1646 a12p_b, inc_ap,
1647 &alpha12,
1649 sigma11 );
1650
1651 // FLA_Copy( a12p, v21 );
1652 // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
1653 // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
1654 // FLA_Conjugate( v21_b );
1656 n_ahead,
1657 a12p, inc_ap,
1658 v21, inc_v );
1661 n_ahead,
1663 v21, inc_v );
1664 bl1_cconjv( n_ahead - 1,
1665 v21_b, inc_v );
1666
1667 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
1668 // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
1671
1672 // FLA_Copy( g0, s01 );
1673 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_conj_alpha12, A02_l, s01 );
1674 // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, s01 );
1676 n_behind,
1677 g0, inc_g,
1678 s01, rs_S );
1680 n_behind,
1682 A02_l, rs_A,
1683 s01, rs_S );
1685 n_behind,
1687 s01, rs_S );
1688
1689 // FLA_Copyt( FLA_NO_TRANSPOSE, alpha12, a12t_l );
1690 // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
1691 *a12t_l = alpha12;
1693 n_ahead - 1,
1694 v21_b, inc_v,
1695 a12t_r, cs_A );
1696 }
1697
1698 // FLA_Copy( u21p, u21 );
1700 m_ahead,
1701 u21p, inc_up,
1702 u21, inc_u );
1703
1704 if ( n_ahead > 0 )
1705 {
1706 // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
1707 // FLA_Scal( FLA_MINUS_ONE, beta );
1708 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
1710 n_ahead,
1711 y21, rs_Y,
1712 v21, inc_v,
1713 &beta );
1715
1716 // FLA_Copy( w21, z21 );
1717 // FLA_Axpy( minus_conj_alpha12, a22l, z21 );
1718 // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
1719 // FLA_Axpy( beta, u21, z21 );
1721 m_ahead,
1722 w21, inc_w,
1723 z21, rs_Z );
1725 m_ahead,
1727 a22l, inc_al,
1728 z21, rs_Z );
1730 m_ahead,
1732 z21, rs_Z );
1734 m_ahead,
1735 &beta,
1736 u21, inc_u,
1737 z21, rs_Z );
1738
1739 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
1740 // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
1742 n_ahead,
1743 tau11,
1744 y21, rs_Y );
1746 m_ahead,
1747 sigma11,
1748 z21, rs_Z );
1749 }
1750 else // if ( n_ahead == 0 )
1751 {
1752 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
1753 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
1755 n_behind,
1756 a10t, cs_A,
1757 t01, rs_T );
1760 m_ahead,
1761 n_behind,
1762 buff_1,
1763 A20, rs_A, cs_A,
1764 u21, inc_u,
1765 buff_1,
1766 t01, rs_T );
1767 }
1768
1769 /*------------------------------------------------------------*/
1770
1771 }
1772
1773 // FLA_Obj_free( &w );
1774 // FLA_Obj_free( &al );
1775 // FLA_Obj_free( &ap );
1776 // FLA_Obj_free( &u );
1777 // FLA_Obj_free( &up );
1778 // FLA_Obj_free( &v );
1779 // FLA_Obj_free( &d );
1780 // FLA_Obj_free( &e );
1781 // FLA_Obj_free( &f );
1782 // FLA_Obj_free( &g );
1783 FLA_free( buff_w );
1784 FLA_free( buff_al );
1785 FLA_free( buff_ap );
1786 FLA_free( buff_u );
1787 FLA_free( buff_up );
1788 FLA_free( buff_v );
1789 FLA_free( buff_d );
1790 FLA_free( buff_e );
1791 FLA_free( buff_f );
1792 FLA_free( buff_g );
1793
1794 return FLA_SUCCESS;
1795}

References bl1_caxpyv(), bl1_cconjv(), bl1_ccopyv(), bl1_cdot(), bl1_cgemv(), bl1_cinvscalv(), bl1_csetm(), bl1_csetv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opc(), FLA_Househ2s_UT_r_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var4().

◆ FLA_Bidiag_UT_u_step_opc_var5()

FLA_Error FLA_Bidiag_UT_u_step_opc_var5 ( int  m_A,
int  n_A,
int  m_TS,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_Y,
int  rs_Y,
int  cs_Y,
scomplex buff_Z,
int  rs_Z,
int  cs_Z,
scomplex buff_T,
int  rs_T,
int  cs_T,
scomplex buff_S,
int  rs_S,
int  cs_S 
)
953{
957
960 int i;
961
962 // b_alg = FLA_Obj_length( T );
963 int b_alg = m_TS;
964
965 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
966 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
967 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &d );
968 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &e );
969 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
970 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &g );
971 scomplex* buff_u = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
972 scomplex* buff_v = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
973 scomplex* buff_d = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
974 scomplex* buff_e = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
975 scomplex* buff_f = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
976 scomplex* buff_g = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
977 int inc_u = 1;
978 int inc_v = 1;
979 int inc_d = 1;
980 int inc_e = 1;
981 int inc_f = 1;
982 int inc_g = 1;
983
984 // FLA_Set( FLA_ZERO, Y );
985 // FLA_Set( FLA_ZERO, Z );
986 bl1_csetm( n_A,
987 b_alg,
988 buff_0,
989 buff_Y, rs_Y, cs_Y );
990 bl1_csetm( m_A,
991 b_alg,
992 buff_0,
993 buff_Z, rs_Z, cs_Z );
994
995 for ( i = 0; i < b_alg; ++i )
996 {
997 scomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
998 scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
999 scomplex* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
1000 scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
1001 scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
1002 scomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
1003 scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
1004 scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
1005
1006 scomplex* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
1007 scomplex* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
1008 scomplex* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
1009
1010 scomplex* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
1011 scomplex* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
1012 scomplex* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
1013
1014 scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
1015 scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
1016
1017 scomplex* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
1018 scomplex* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
1019
1020 scomplex* u21 = buff_u + (i+1)*inc_u;
1021
1022 scomplex* v21 = buff_v + (i+1)*inc_v;
1023
1024 scomplex* d0 = buff_d + (0 )*inc_d;
1025
1026 scomplex* e0 = buff_e + (0 )*inc_e;
1027
1028 scomplex* f0 = buff_f + (0 )*inc_f;
1029
1030 scomplex* g0 = buff_g + (0 )*inc_g;
1031
1032 scomplex* v21_t = v21 + (0 )*inc_v;
1033 scomplex* v21_b = v21 + (1 )*inc_v;
1034
1035 scomplex* a01_b = a01 + (0 )*cs_A + (i-1)*rs_A;
1036
1037 scomplex* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
1038 scomplex* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
1039
1040 scomplex* ABL = a10t;
1041 scomplex* ZBL = z10t;
1042
1043 scomplex* a2 = alpha11;
1044
1045 int m_ahead = m_A - i - 1;
1046 int n_ahead = n_A - i - 1;
1047 int m_behind = i;
1048 int n_behind = i;
1049
1050 /*------------------------------------------------------------*/
1051
1052 if ( m_behind > 0 )
1053 {
1054 // FLA_Copy( a01_b, last_elem );
1055 // FLA_Set( FLA_ONE, a01_b );
1056 last_elem = *a01_b;
1057 *a01_b = *buff_1;
1058 }
1059
1060 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
1061 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a01, FLA_ONE, a2 );
1064 m_ahead + 1,
1065 n_behind,
1066 buff_m1,
1067 ABL, rs_A, cs_A,
1068 y10t, cs_Y,
1069 buff_1,
1070 a2, rs_A );
1073 m_ahead + 1,
1074 n_behind,
1075 buff_m1,
1076 ZBL, rs_Z, cs_Z,
1077 a01, rs_A,
1078 buff_1,
1079 a2, rs_A );
1080
1081 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
1082 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, A02, z10t, FLA_ONE, a12t );
1085 n_ahead,
1086 n_behind,
1087 buff_m1,
1088 Y20, rs_Y, cs_Y,
1089 a10t, cs_A,
1090 buff_1,
1091 a12t, cs_A );
1094 m_behind,
1095 n_ahead,
1096 buff_m1,
1097 A02, rs_A, cs_A,
1098 z10t, cs_Z,
1099 buff_1,
1100 a12t, cs_A );
1101
1102 if ( m_behind > 0 )
1103 {
1104 // FLA_Copy( last_elem, a01_b );
1105 *a01_b = last_elem;
1106 }
1107
1108 // FLA_Househ2_UT( FLA_LEFT,
1109 // alpha11,
1110 // a21, tau11 );
1111 // FLA_Copy( a21, u21 );
1113 alpha11,
1114 a21, rs_A,
1115 tau11 );
1117 m_ahead,
1118 a21, rs_A,
1119 u21, inc_u );
1120
1121 if ( n_ahead > 0 )
1122 {
1123 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a12t, y21 );
1124 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, u21, FLA_ONE, y21 );
1126 n_ahead,
1127 a12t, cs_A,
1128 y21, rs_Y );
1131 m_ahead,
1132 n_ahead,
1133 buff_1,
1134 A22, rs_A, cs_A,
1135 u21, inc_u,
1136 buff_1,
1137 y21, rs_Y );
1138
1139 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ZERO, d0 );
1140 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, u21, FLA_ZERO, e0 );
1143 m_ahead,
1144 n_behind,
1145 buff_1,
1146 A20, rs_A, cs_A,
1147 u21, inc_u,
1148 buff_0,
1149 d0, inc_d );
1152 m_ahead,
1153 n_behind,
1154 buff_1,
1155 Z20, rs_Z, cs_Z,
1156 u21, inc_u,
1157 buff_0,
1158 e0, inc_e );
1159
1160 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
1161 // FLA_Axpy( FLA_ONE, d0, t01 );
1163 n_behind,
1164 a10t, cs_A,
1165 t01, rs_T );
1167 n_behind,
1168 buff_1,
1169 d0, inc_d,
1170 t01, rs_T );
1171
1172 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
1173 // FLA_Gemv( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, e0, FLA_ONE, y21 );
1176 n_ahead,
1177 n_behind,
1178 buff_m1,
1179 Y20, rs_Y, cs_Y,
1180 d0, inc_d,
1181 buff_1,
1182 y21, rs_Y );
1185 m_behind,
1186 n_ahead,
1187 buff_m1,
1188 A02, rs_A, cs_A,
1189 e0, inc_e,
1190 buff_1,
1191 y21, rs_Y );
1192
1193 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
1195 n_ahead,
1196 tau11,
1197 y21, rs_Y );
1198
1199 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, y21, a12t );
1201 n_ahead,
1202 buff_m1,
1203 y21, rs_Y,
1204 a12t, cs_A );
1205
1206 // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
1208 a12t_l,
1209 a12t_r, cs_A,
1210 sigma11 );
1211
1212 // FLA_Set( FLA_ONE, v21_t );
1213 // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
1214 *v21_t = *buff_1;
1216 n_ahead - 1,
1217 a12t_r, cs_A,
1218 v21_b, inc_v );
1219
1220 // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
1221 // FLA_Scal( FLA_MINUS_ONE, beta );
1223 n_ahead,
1224 y21, rs_Y,
1225 v21, inc_v,
1226 &beta );
1227 bl1_cscals( buff_m1, &beta );
1228
1229 // FLA_Copy( u21, z21 );
1230 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, v21, beta, z21 );
1232 m_ahead,
1233 u21, inc_u,
1234 z21, rs_Z );
1237 m_ahead,
1238 n_ahead,
1239 buff_1,
1240 A22, rs_A, cs_A,
1241 v21, inc_v,
1242 &beta,
1243 z21, rs_Z );
1244
1245 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, Y20, v21, FLA_ZERO, f0 );
1246 // FLA_Gemvc( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, g0 );
1249 n_ahead,
1250 m_behind,
1251 buff_1,
1252 Y20, rs_Y, cs_Y,
1253 v21, inc_v,
1254 buff_0,
1255 f0, inc_f );
1258 m_behind,
1259 n_ahead,
1260 buff_1,
1261 A02, rs_A, cs_A,
1262 v21, inc_v,
1263 buff_0,
1264 g0, inc_g );
1265
1266 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, z21 );
1267 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, g0, FLA_ONE, z21 );
1270 m_ahead,
1271 n_behind,
1272 buff_m1,
1273 A20, rs_A, cs_A,
1274 f0, inc_f,
1275 buff_1,
1276 z21, rs_Z );
1279 m_ahead,
1280 n_behind,
1281 buff_m1,
1282 Z20, rs_Z, cs_Z,
1283 g0, inc_g,
1284 buff_1,
1285 z21, rs_Z );
1286
1287 // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
1289 m_ahead,
1290 sigma11,
1291 z21, rs_Z );
1292
1293 // FLA_Copy( g0, s01 );
1295 n_behind,
1296 g0, inc_g,
1297 s01, rs_S );
1298 }
1299 else // if ( n_ahead == 0 )
1300 {
1301 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
1302 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
1304 n_behind,
1305 a10t, cs_A,
1306 t01, rs_T );
1309 m_ahead,
1310 n_behind,
1311 buff_1,
1312 A20, rs_A, cs_A,
1313 u21, inc_u,
1314 buff_1,
1315 t01, rs_T );
1316 }
1317
1318 /*------------------------------------------------------------*/
1319
1320 }
1321
1322 // FLA_Obj_free( &u );
1323 // FLA_Obj_free( &v );
1324 // FLA_Obj_free( &d );
1325 // FLA_Obj_free( &e );
1326 // FLA_Obj_free( &f );
1327 // FLA_Obj_free( &g );
1328 FLA_free( buff_u );
1329 FLA_free( buff_v );
1330 FLA_free( buff_d );
1331 FLA_free( buff_e );
1332 FLA_free( buff_f );
1333 FLA_free( buff_g );
1334
1335 return FLA_SUCCESS;
1336}

References bl1_caxpyv(), bl1_ccopyv(), bl1_cdot(), bl1_cgemv(), bl1_cinvscalv(), bl1_csetm(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opc(), FLA_Househ2_UT_r_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var5().

◆ FLA_Bidiag_UT_u_step_opd_var1()

FLA_Error FLA_Bidiag_UT_u_step_opd_var1 ( int  m_A,
int  n_A,
int  m_TS,
double buff_A,
int  rs_A,
int  cs_A,
double buff_T,
int  rs_T,
int  cs_T,
double buff_S,
int  rs_S,
int  cs_S 
)
255{
256 double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
257 double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
258
259 int i;
260
261 // b_alg = FLA_Obj_length( T );
262 int b_alg = m_TS;
263
264 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
265 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
266 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
267 double* buff_v = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
268 int inc_v = 1;
269
270 for ( i = 0; i < b_alg; ++i )
271 {
272 double* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
273 double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
274 double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
275 double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
276 double* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
277 double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
278 double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
279
280 double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
281 double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
282
283 double* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
284 double* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
285
286 double* v21 = buff_v + (i+1)*inc_v;
287
288 double* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
289 double* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
290
291 double* A22_l = A22 + (0 )*cs_A + (0 )*rs_A;
292 double* A22_r = A22 + (1 )*cs_A + (0 )*rs_A;
293
294 double* v21_t = v21 + (0 )*inc_v;
295 double* v21_b = v21 + (1 )*inc_v;
296
297 int m_ahead = m_A - i - 1;
298 int n_ahead = n_A - i - 1;
299 int m_behind = i;
300 int n_behind = i;
301
302 /*------------------------------------------------------------*/
303
304 // FLA_Househ2_UT( FLA_LEFT,
305 // alpha11,
306 // a21, tau11 );
308 alpha11,
309 a21, rs_A,
310 tau11 );
311
312 if ( n_ahead > 0 )
313 {
314 // FLA_Apply_H2_UT( FLA_LEFT, tau11, a21, a12t, A22 );
316 n_ahead,
317 tau11,
318 a21, rs_A,
319 a12t, cs_A,
320 A22, rs_A, cs_A );
321
322 // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
324 a12t_l,
325 a12t_r, cs_A,
326 sigma11 );
327
328 // FLA_Set( FLA_ONE, v21_t );
329 // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
330 *v21_t = *buff_1;
332 n_ahead - 1,
333 a12t_r, cs_A,
334 v21_b, inc_v );
335
336 // FLA_Apply_H2_UT( FLA_RIGHT, sigma11, v21_b, A22_l, A22_r );
338 n_ahead - 1,
339 sigma11,
340 v21_b, inc_v,
341 A22_l, rs_A,
342 A22_r, rs_A, cs_A );
343
344 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
347 m_behind,
348 n_ahead,
349 buff_1,
350 A02, rs_A, cs_A,
351 v21, inc_v,
352 buff_0,
353 s01, rs_S );
354 }
355
356 // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
357 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
359 n_behind,
360 a10t, cs_A,
361 t01, rs_T );
364 m_ahead,
365 n_behind,
366 buff_1,
367 A20, rs_A, cs_A,
368 a21, rs_A,
369 buff_1,
370 t01, rs_T );
371
372 /*------------------------------------------------------------*/
373
374 }
375
376 // FLA_Obj_free( &v );
377 FLA_free( buff_v );
378
379 return FLA_SUCCESS;
380}
FLA_Error FLA_Apply_H2_UT_l_opd_var1(int m_u2_A2, int n_a1t, double *tau, double *u2, int inc_u2, double *a1t, int inc_a1t, double *A2, int rs_A2, int cs_A2)
Definition FLA_Apply_H2_UT_l_opt_var1.c:195
FLA_Error FLA_Apply_H2_UT_r_opd_var1(int n_u2h_A2, int m_a1, double *tau, double *u2h, int inc_u2h, double *a1, int inc_a1, double *A2, int rs_A2, int cs_A2)
Definition FLA_Apply_H2_UT_r_opt_var1.c:181

References bl1_dcopyv(), bl1_dgemv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, FLA_Apply_H2_UT_l_opd_var1(), FLA_Apply_H2_UT_r_opd_var1(), FLA_free(), FLA_Househ2_UT_l_opd(), FLA_Househ2_UT_r_opd(), FLA_malloc(), FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var1().

◆ FLA_Bidiag_UT_u_step_opd_var2()

FLA_Error FLA_Bidiag_UT_u_step_opd_var2 ( int  m_A,
int  n_A,
int  m_TS,
double buff_A,
int  rs_A,
int  cs_A,
double buff_T,
int  rs_T,
int  cs_T,
double buff_S,
int  rs_S,
int  cs_S 
)
328{
329 double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
330 double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
332
333 double beta;
334 int i;
335
336 // b_alg = FLA_Obj_length( T );
337 int b_alg = m_TS;
338
339 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
340 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
341 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
342 double* buff_v = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
343 double* buff_y = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
344 double* buff_z = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
345 int inc_v = 1;
346 int inc_y = 1;
347 int inc_z = 1;
348
349 for ( i = 0; i < b_alg; ++i )
350 {
351 double* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
352 double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
353 double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
354 double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
355 double* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
356 double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
357 double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
358
359 double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
360 double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
361
362 double* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
363 double* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
364
365 double* v21 = buff_v + (i+1)*inc_v;
366
367 double* y21 = buff_y + (i+1)*inc_y;
368
369 double* z21 = buff_z + (i+1)*inc_z;
370
371 double* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
372 double* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
373
374 double* v21_t = v21 + (0 )*inc_v;
375 double* v21_b = v21 + (1 )*inc_v;
376
377 int m_ahead = m_A - i - 1;
378 int n_ahead = n_A - i - 1;
379 int m_behind = i;
380 int n_behind = i;
381
382 /*------------------------------------------------------------*/
383
384 // FLA_Househ2_UT( FLA_LEFT,
385 // alpha11,
386 // a21, tau11 );
388 alpha11,
389 a21, rs_A,
390 tau11 );
391
392 if ( n_ahead > 0 )
393 {
394 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a12t, y21 );
395 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, a21, FLA_ONE, y21 );
397 n_ahead,
398 a12t, cs_A,
399 y21, inc_y );
402 m_ahead,
403 n_ahead,
404 buff_1,
405 A22, rs_A, cs_A,
406 a21, rs_A,
407 buff_1,
408 y21, inc_y );
409
410 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
412 n_ahead,
413 tau11,
414 y21, inc_y );
415
416 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, y21, a12t );
418 n_ahead,
419 buff_m1,
420 y21, inc_y,
421 a12t, cs_A );
422
423 // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
425 a12t_l,
426 a12t_r, cs_A,
427 sigma11 );
428
429 // FLA_Set( FLA_ONE, v21_t );
430 // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
431 *v21_t = *buff_1;
433 n_ahead - 1,
434 a12t_r, cs_A,
435 v21_b, inc_y );
436
437 // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
438 // FLA_Scal( FLA_MINUS_ONE, beta );
440 n_ahead,
441 y21, inc_y,
442 v21, inc_v,
443 &beta );
444 bl1_dneg1( &beta );
445
446 // FLA_Copy( a21, z21 );
447 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, v21, beta, z21 );
448 // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
450 m_ahead,
451 a21, rs_A,
452 z21, inc_z );
455 m_ahead,
456 n_ahead,
457 buff_1,
458 A22, rs_A, cs_A,
459 v21, inc_v,
460 &beta,
461 z21, inc_z );
463 m_ahead,
464 sigma11,
465 z21, inc_z );
466
467 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, a21, y21, A22 );
468 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
471 m_ahead,
472 n_ahead,
473 buff_m1,
474 a21, rs_A,
475 y21, inc_y,
476 A22, rs_A, cs_A );
479 m_ahead,
480 n_ahead,
481 buff_m1,
482 z21, inc_z,
483 v21, inc_v,
484 A22, rs_A, cs_A );
485
486 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
489 m_behind,
490 n_ahead,
491 buff_1,
492 A02, rs_A, cs_A,
493 v21, inc_v,
494 buff_0,
495 s01, rs_S );
496 }
497
498 // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
499 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
501 n_behind,
502 a10t, cs_A,
503 t01, rs_T );
506 m_ahead,
507 n_behind,
508 buff_1,
509 A20, rs_A, cs_A,
510 a21, rs_A,
511 buff_1,
512 t01, rs_T );
513
514 /*------------------------------------------------------------*/
515
516 }
517
518 // FLA_Obj_free( &v );
519 // FLA_Obj_free( &y );
520 // FLA_Obj_free( &z );
521 FLA_free( buff_v );
522 FLA_free( buff_y );
523 FLA_free( buff_z );
524
525 return FLA_SUCCESS;
526}

References bl1_daxpyv(), bl1_dcopyv(), bl1_ddot(), bl1_dgemv(), bl1_dger(), bl1_dinvscalv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opd(), FLA_Househ2_UT_r_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var2().

◆ FLA_Bidiag_UT_u_step_opd_var3()

FLA_Error FLA_Bidiag_UT_u_step_opd_var3 ( int  m_A,
int  n_A,
int  m_TS,
double buff_A,
int  rs_A,
int  cs_A,
double buff_T,
int  rs_T,
int  cs_T,
double buff_S,
int  rs_S,
int  cs_S 
)
538{
539 double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
540 double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
542
543 double alpha12;
544 double minus_conj_alpha12;
545 double psi11_minus_alpha12;
546 double minus_inv_tau11;
547 double minus_upsilon11;
548 double minus_conj_nu11;
549 double minus_conj_psi11;
550 double minus_zeta11;
551 double beta;
552 int i;
553
554 // b_alg = FLA_Obj_length( T );
555 int b_alg = m_TS;
556
557 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
558 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
559 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
560 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
561 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
562 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
563 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
564 double* buff_w = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
565 double* buff_ap = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
566 double* buff_u = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
567 double* buff_up = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
568 double* buff_v = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
569 double* buff_y = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
570 double* buff_z = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
571 int inc_w = 1;
572 int inc_ap = 1;
573 int inc_u = 1;
574 int inc_up = 1;
575 int inc_v = 1;
576 int inc_y = 1;
577 int inc_z = 1;
578
579 for ( i = 0; i < b_alg; ++i )
580 {
581 double* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
582 double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
583 double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
584 double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
585 double* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
586 double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
587 double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
588
589 double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
590 double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
591
592 double* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
593 double* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
594
595 double* w21 = buff_w + (i+1)*inc_w;
596
597 double* a12p = buff_ap + (i+1)*inc_ap;
598
599 double* upsilon11 = buff_u + (i )*inc_u;
600 double* u21 = buff_u + (i+1)*inc_u;
601
602 double* u21p = buff_up + (i+1)*inc_up;
603
604 double* nu11 = buff_v + (i )*inc_v;
605 double* v21 = buff_v + (i+1)*inc_v;
606
607 double* psi11 = buff_y + (i )*inc_y;
608 double* y21 = buff_y + (i+1)*inc_y;
609
610 double* zeta11 = buff_z + (i )*inc_z;
611 double* z21 = buff_z + (i+1)*inc_z;
612
613 double* a12p_t = a12p + (0 )*inc_ap;
614 double* a12p_b = a12p + (1 )*inc_ap;
615
616 double* v21_t = v21 + (0 )*inc_v;
617 double* v21_b = v21 + (1 )*inc_v;
618
619 double* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
620 double* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
621
622 double* A22_l = A22 + (0 )*cs_A + (0 )*rs_A;
623
624 int m_ahead = m_A - i - 1;
625 int n_ahead = n_A - i - 1;
626 int m_behind = i;
627 int n_behind = i;
628
629 /*------------------------------------------------------------*/
630
631 if ( m_behind > 0 )
632 {
633 // FLA_Copy( upsilon11, minus_upsilon11 );
634 // FLA_Scal( FLA_MINUS_ONE, minus_upsilon11 );
636
637 // FLA_Copy( zeta11, minus_zeta11 );
638 // FLA_Scal( FLA_MINUS_ONE, minus_zeta11 );
640
641 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, psi11, minus_conj_psi11 );
642 // FLA_Scal( FLA_MINUS_ONE, minus_conj_psi11 );
645
646 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, nu11, minus_conj_nu11 );
647 // FLA_Scal( FLA_MINUS_ONE, minus_conj_nu11 );
650
651 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, upsilon11, alpha11 );
652 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11, zeta11, alpha11 );
654 1,
656 upsilon11, 1,
657 alpha11, 1 );
659 1,
661 zeta11, 1,
662 alpha11, 1 );
663
664 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, u21, a21 );
665 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11, z21, a21 );
667 m_ahead,
669 u21, inc_u,
670 a21, rs_A );
672 m_ahead,
674 z21, inc_z,
675 a21, rs_A );
676
677 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_upsilon11, y21, a12t );
678 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_zeta11, v21, a12t );
680 n_ahead,
682 y21, inc_y,
683 a12t, cs_A );
685 n_ahead,
687 v21, inc_v,
688 a12t, cs_A );
689 }
690
691 // FLA_Househ2_UT( FLA_LEFT,
692 // alpha11,
693 // a21, tau11 );
694 // FLA_Copy( a21, u21p );
696 alpha11,
697 a21, rs_A,
698 tau11 );
700 m_ahead,
701 a21, rs_A,
702 u21p, inc_up );
703
704 if ( n_ahead > 0 )
705 {
706 // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
707 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
709
710 // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
711 // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
713 n_ahead,
714 a12t, cs_A,
715 a12p, inc_ap );
717 n_ahead,
719 a12t, cs_A,
720 a12p, inc_ap );
721 }
722
723 if ( m_behind > 0 )
724 {
725 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u21, y21, A22 );
726 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
729 m_ahead,
730 n_ahead,
731 buff_m1,
732 u21, inc_u,
733 y21, inc_y,
734 A22, rs_A, cs_A );
737 m_ahead,
738 n_ahead,
739 buff_m1,
740 z21, inc_z,
741 v21, inc_v,
742 A22, rs_A, cs_A );
743 }
744
745 if ( n_ahead > 0 )
746 {
747 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, u21p, FLA_ZERO, y21 );
750 m_ahead,
751 n_ahead,
752 buff_1,
753 A22, rs_A, cs_A,
754 u21p, inc_up,
755 buff_0,
756 y21, inc_y );
757
758 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
760 n_ahead,
762 y21, inc_y,
763 a12p, inc_ap );
764
765 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
768 m_ahead,
769 n_ahead,
770 buff_1,
771 A22, rs_A, cs_A,
772 a12p, inc_ap,
773 buff_0,
774 w21, inc_w );
775
776 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
778 n_ahead,
779 buff_1,
780 a12t, cs_A,
781 y21, inc_y );
782
783 // FLA_Househ2s_UT( FLA_RIGHT,
784 // a12p_t,
785 // a12p_b,
786 // alpha12, psi11_minus_alpha12, sigma11 );
788 a12p_t,
789 a12p_b, inc_ap,
790 &alpha12,
792 sigma11 );
793
794 // FLA_Copy( a12p, v21 );
795 // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
796 // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
797 // FLA_Conjugate( v21_b );
799 n_ahead,
800 a12p, inc_ap,
801 v21, inc_v );
804 n_ahead,
806 v21, inc_v );
807 bl1_dconjv( n_ahead - 1,
808 v21_b, inc_v );
809
810 // FLA_Copyt( FLA_NO_TRANSPOSE, alpha12, a12t_l );
811 // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
812 *a12t_l = alpha12;
814 n_ahead - 1,
815 v21_b, inc_v,
816 a12t_r, cs_A );
817 }
818
819 // FLA_Copy( u21p, u21 );
821 m_ahead,
822 u21p, inc_up,
823 u21, inc_u );
824
825 if ( n_ahead > 0 )
826 {
827 // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
828 // FLA_Scal( FLA_MINUS_ONE, beta );
829 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
831 n_ahead,
832 y21, inc_y,
833 v21, inc_v,
834 &beta );
836
837 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
838 // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
841
842 // FLA_Copy( w21, z21 );
843 // FLA_Axpy( minus_conj_alpha12, A22_l, z21 );
844 // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
845 // FLA_Axpy( beta, u21, z21 );
847 m_ahead,
848 w21, inc_w,
849 z21, inc_z );
851 m_ahead,
853 A22_l, rs_A,
854 z21, inc_z );
856 m_ahead,
858 z21, inc_z );
860 m_ahead,
861 &beta,
862 u21, inc_u,
863 z21, inc_z );
864
865 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
866 // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
868 n_ahead,
869 tau11,
870 y21, inc_y );
872 m_ahead,
873 sigma11,
874 z21, inc_z );
875
876 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
879 m_behind,
880 n_ahead,
881 buff_1,
882 A02, rs_A, cs_A,
883 v21, inc_v,
884 buff_0,
885 s01, rs_S );
886 }
887
888 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
889 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
891 n_behind,
892 a10t, cs_A,
893 t01, rs_T );
896 m_ahead,
897 n_behind,
898 buff_1,
899 A20, rs_A, cs_A,
900 u21, inc_u,
901 buff_1,
902 t01, rs_T );
903
904 if ( m_behind + 1 == b_alg && n_ahead > 0 )
905 {
906 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u21, y21, A22 );
907 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
910 m_ahead,
911 n_ahead,
912 buff_m1,
913 u21, inc_u,
914 y21, inc_y,
915 A22, rs_A, cs_A );
918 m_ahead,
919 n_ahead,
920 buff_m1,
921 z21, inc_z,
922 v21, inc_v,
923 A22, rs_A, cs_A );
924 }
925
926 /*------------------------------------------------------------*/
927
928 }
929
930 // FLA_Obj_free( &w );
931 // FLA_Obj_free( &ap );
932 // FLA_Obj_free( &u );
933 // FLA_Obj_free( &up );
934 // FLA_Obj_free( &v );
935 // FLA_Obj_free( &y );
936 // FLA_Obj_free( &z );
937 FLA_free( buff_w );
938 FLA_free( buff_ap );
939 FLA_free( buff_u );
940 FLA_free( buff_up );
941 FLA_free( buff_v );
942 FLA_free( buff_y );
943 FLA_free( buff_z );
944
945 return FLA_SUCCESS;
946}

References bl1_daxpyv(), bl1_dconjv(), bl1_dcopyv(), bl1_ddot(), bl1_dgemv(), bl1_dger(), bl1_dinvscalv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opd(), FLA_Househ2s_UT_r_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var3().

◆ FLA_Bidiag_UT_u_step_opd_var4()

FLA_Error FLA_Bidiag_UT_u_step_opd_var4 ( int  m_A,
int  n_A,
int  m_TS,
double buff_A,
int  rs_A,
int  cs_A,
double buff_Y,
int  rs_Y,
int  cs_Y,
double buff_Z,
int  rs_Z,
int  cs_Z,
double buff_T,
int  rs_T,
int  cs_T,
double buff_S,
int  rs_S,
int  cs_S 
)
711{
712 double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
713 double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
715
716 double alpha12;
717 double minus_conj_alpha12;
718 double psi11_minus_alpha12;
719 double minus_inv_tau11;
720 double beta;
721 double last_elem;
722 int i;
723
724 // b_alg = FLA_Obj_length( T );
725 int b_alg = m_TS;
726
727 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
728 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &al );
729 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
730 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
731 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
732 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
733 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &d );
734 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &e );
735 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
736 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &g );
737 double* buff_w = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
738 double* buff_al = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
739 double* buff_ap = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
740 double* buff_u = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
741 double* buff_up = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
742 double* buff_v = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
743 double* buff_d = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
744 double* buff_e = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
745 double* buff_f = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
746 double* buff_g = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
747 int inc_w = 1;
748 int inc_al = 1;
749 int inc_ap = 1;
750 int inc_u = 1;
751 int inc_up = 1;
752 int inc_v = 1;
753 int inc_d = 1;
754 int inc_e = 1;
755 int inc_f = 1;
756 int inc_g = 1;
757
758 // FLA_Set( FLA_ZERO, Y );
759 // FLA_Set( FLA_ZERO, Z );
760 bl1_dsetm( n_A,
761 b_alg,
762 buff_0,
763 buff_Y, rs_Y, cs_Y );
764 bl1_dsetm( m_A,
765 b_alg,
766 buff_0,
767 buff_Z, rs_Z, cs_Z );
768
769 for ( i = 0; i < b_alg; ++i )
770 {
771 double* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
772 double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
773 double* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
774 double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
775 double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
776 double* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
777 double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
778 double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
779
780 double* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
781 double* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
782 double* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
783
784 double* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
785 double* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
786 double* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
787
788 double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
789 double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
790
791 double* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
792 double* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
793
794 double* w21 = buff_w + (i+1)*inc_w;
795
796 double* a22l = buff_al + (i+1)*inc_al;
797
798 double* a12p = buff_ap + (i+1)*inc_ap;
799
800 double* u21 = buff_u + (i+1)*inc_u;
801
802 double* u21p = buff_up + (i+1)*inc_up;
803
804 double* v21 = buff_v + (i+1)*inc_v;
805
806 double* d0 = buff_d + (0 )*inc_d;
807
808 double* e0 = buff_e + (0 )*inc_e;
809
810 double* f0 = buff_f + (0 )*inc_f;
811
812 double* g0 = buff_g + (0 )*inc_g;
813
814 double* a12p_t = a12p + (0 )*inc_ap;
815 double* a12p_b = a12p + (1 )*inc_ap;
816
817 double* v21_t = v21 + (0 )*inc_v;
818 double* v21_b = v21 + (1 )*inc_v;
819
820 double* a01_b = a01 + (0 )*cs_A + (i-1)*rs_A;
821
822 double* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
823 double* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
824
825 double* A02_l = A02 + (0 )*cs_A + (0 )*rs_A;
826
827 double* A22_l = A22 + (0 )*cs_A + (0 )*rs_A;
828
829 double* Y20_t = Y20 + (0 )*cs_Y + (0 )*rs_Y;
830
831 double* ABL = a10t;
832 double* ZBL = z10t;
833
834 double* a2 = alpha11;
835
836 int m_ahead = m_A - i - 1;
837 int n_ahead = n_A - i - 1;
838 int m_behind = i;
839 int n_behind = i;
840
841 /*------------------------------------------------------------*/
842
843 if ( m_behind > 0 )
844 {
845 // FLA_Copy( a01_b, last_elem );
846 // FLA_Set( FLA_ONE, a01_b );
847 last_elem = *a01_b;
848 *a01_b = *buff_1;
849 }
850
851 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
852 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a01, FLA_ONE, a2 );
855 m_ahead + 1,
856 n_behind,
857 buff_m1,
858 ABL, rs_A, cs_A,
859 y10t, cs_Y,
860 buff_1,
861 a2, rs_A );
864 m_ahead + 1,
865 n_behind,
866 buff_m1,
867 ZBL, rs_Z, cs_Z,
868 a01, rs_A,
869 buff_1,
870 a2, rs_A );
871
872 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
873 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, A02, z10t, FLA_ONE, a12t );
876 n_ahead,
877 n_behind,
878 buff_m1,
879 Y20, rs_Y, cs_Y,
880 a10t, cs_A,
881 buff_1,
882 a12t, cs_A );
885 m_behind,
886 n_ahead,
887 buff_m1,
888 A02, rs_A, cs_A,
889 z10t, cs_Z,
890 buff_1,
891 a12t, cs_A );
892
893 if ( m_behind > 0 )
894 {
895 // FLA_Copy( last_elem, a01_b );
896 *a01_b = last_elem;
897 }
898
899 // FLA_Househ2_UT( FLA_LEFT,
900 // alpha11,
901 // a21, tau11 );
902 // FLA_Copy( a21, u21p );
904 alpha11,
905 a21, rs_A,
906 tau11 );
908 m_ahead,
909 a21, rs_A,
910 u21p, inc_up );
911
912 if ( n_ahead > 0 )
913 {
914 // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
915 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
917
918 // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
919 // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
921 n_ahead,
922 a12t, cs_A,
923 a12p, inc_ap );
925 n_ahead,
927 a12t, cs_A,
928 a12p, inc_ap );
929
930 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21p, FLA_ZERO, d0 );
931 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, u21p, FLA_ZERO, e0 );
934 m_ahead,
935 n_behind,
936 buff_1,
937 A20, rs_A, cs_A,
938 u21p, inc_up,
939 buff_0,
940 d0, inc_d );
943 m_ahead,
944 n_behind,
945 buff_1,
946 Z20, rs_Z, cs_Z,
947 u21p, inc_up,
948 buff_0,
949 e0, inc_e );
950
951 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
952 // FLA_Axpy( FLA_ONE, d0, t01 );
954 n_behind,
955 a10t, cs_A,
956 t01, rs_T );
958 n_behind,
959 buff_1,
960 d0, inc_d,
961 t01, rs_T );
962
963 // FLA_Set( FLA_ZERO, y21 );
964 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
965 // FLA_Gemv( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, e0, FLA_ONE, y21 );
967 buff_0,
968 y21, rs_Y );
971 n_ahead,
972 n_behind,
973 buff_m1,
974 Y20, rs_Y, cs_Y,
975 d0, inc_d,
976 buff_1,
977 y21, rs_Y );
980 m_behind,
981 n_ahead,
982 buff_m1,
983 A02, rs_A, cs_A,
984 e0, inc_e,
985 buff_1,
986 y21, rs_Y );
987
988 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, u21p, FLA_ONE, y21 );
991 m_ahead,
992 n_ahead,
993 buff_1,
994 A22, rs_A, cs_A,
995 u21p, inc_up,
996 buff_1,
997 y21, rs_Y );
998
999 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
1001 n_ahead,
1003 y21, rs_Y,
1004 a12p, inc_ap );
1005
1006 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
1009 m_ahead,
1010 n_ahead,
1011 buff_1,
1012 A22, rs_A, cs_A,
1013 a12p, inc_ap,
1014 buff_0,
1015 w21, inc_w );
1016
1017 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, Y20, a12p, FLA_ZERO, f0 );
1018 // FLA_Gemvc( FLA_CONJ_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A02, a12p, FLA_ZERO, g0 );
1021 n_ahead,
1022 n_behind,
1023 buff_1,
1024 Y20, rs_Y, cs_Y,
1025 a12p, inc_ap,
1026 buff_0,
1027 f0, inc_f );
1030 m_behind,
1031 n_ahead,
1032 buff_1,
1033 A02, rs_A, cs_A,
1034 a12p, inc_ap,
1035 buff_0,
1036 g0, inc_g );
1037
1038 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, w21 );
1039 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, g0, FLA_ONE, w21 );
1042 m_ahead,
1043 n_behind,
1044 buff_m1,
1045 A20, rs_A, cs_A,
1046 f0, inc_f,
1047 buff_1,
1048 w21, inc_w );
1051 m_ahead,
1052 n_behind,
1053 buff_m1,
1054 Z20, rs_Z, cs_Z,
1055 g0, inc_g,
1056 buff_1,
1057 w21, inc_w );
1058
1059 // FLA_Copy( A22_l, a22l );
1060 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, A20, Y20_t, FLA_ONE, a22l );
1061 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, Z20, A02_l, FLA_ONE, a22l );
1063 m_ahead,
1064 A22_l, rs_A,
1065 a22l, inc_al );
1068 m_ahead,
1069 n_behind,
1070 buff_m1,
1071 A20, rs_A, cs_A,
1072 Y20_t, cs_Y,
1073 buff_1,
1074 a22l, inc_al );
1077 m_ahead,
1078 n_behind,
1079 buff_m1,
1080 Z20, rs_Z, cs_Z,
1081 A02_l, rs_A,
1082 buff_1,
1083 a22l, inc_al );
1084
1085 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
1087 n_ahead,
1088 buff_1,
1089 a12t, cs_A,
1090 y21, rs_Y );
1091
1092 // FLA_Househ2s_UT( FLA_RIGHT,
1093 // a12p_t,
1094 // a12p_b,
1095 // alpha12, psi11_minus_alpha12, sigma11 );
1097 a12p_t,
1098 a12p_b, inc_ap,
1099 &alpha12,
1101 sigma11 );
1102
1103 // FLA_Copy( a12p, v21 );
1104 // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
1105 // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
1106 // FLA_Conjugate( v21_b );
1108 n_ahead,
1109 a12p, inc_ap,
1110 v21, inc_v );
1113 n_ahead,
1115 v21, inc_v );
1116 bl1_dconjv( n_ahead - 1,
1117 v21_b, inc_v );
1118
1119 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
1120 // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
1123
1124 // FLA_Copy( g0, s01 );
1125 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_conj_alpha12, A02_l, s01 );
1126 // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, s01 );
1128 n_behind,
1129 g0, inc_g,
1130 s01, rs_S );
1132 n_behind,
1134 A02_l, rs_A,
1135 s01, rs_S );
1137 n_behind,
1139 s01, rs_S );
1140
1141 // FLA_Copyt( FLA_NO_TRANSPOSE, alpha12, a12t_l );
1142 // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
1143 *a12t_l = alpha12;
1145 n_ahead - 1,
1146 v21_b, inc_v,
1147 a12t_r, cs_A );
1148 }
1149
1150 // FLA_Copy( u21p, u21 );
1152 m_ahead,
1153 u21p, inc_up,
1154 u21, inc_u );
1155
1156 if ( n_ahead > 0 )
1157 {
1158 // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
1159 // FLA_Scal( FLA_MINUS_ONE, beta );
1160 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
1162 n_ahead,
1163 y21, rs_Y,
1164 v21, inc_v,
1165 &beta );
1167
1168 // FLA_Copy( w21, z21 );
1169 // FLA_Axpy( minus_conj_alpha12, a22l, z21 );
1170 // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
1171 // FLA_Axpy( beta, u21, z21 );
1173 m_ahead,
1174 w21, inc_w,
1175 z21, rs_Z );
1177 m_ahead,
1179 a22l, inc_al,
1180 z21, rs_Z );
1182 m_ahead,
1184 z21, rs_Z );
1186 m_ahead,
1187 &beta,
1188 u21, inc_u,
1189 z21, rs_Z );
1190
1191 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
1192 // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
1194 n_ahead,
1195 tau11,
1196 y21, rs_Y );
1198 m_ahead,
1199 sigma11,
1200 z21, rs_Z );
1201 }
1202 else // if ( n_ahead == 0 )
1203 {
1204 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
1205 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
1207 n_behind,
1208 a10t, cs_A,
1209 t01, rs_T );
1212 m_ahead,
1213 n_behind,
1214 buff_1,
1215 A20, rs_A, cs_A,
1216 u21, inc_u,
1217 buff_1,
1218 t01, rs_T );
1219 }
1220
1221 /*------------------------------------------------------------*/
1222
1223 }
1224
1225 // FLA_Obj_free( &w );
1226 // FLA_Obj_free( &al );
1227 // FLA_Obj_free( &ap );
1228 // FLA_Obj_free( &u );
1229 // FLA_Obj_free( &up );
1230 // FLA_Obj_free( &v );
1231 // FLA_Obj_free( &d );
1232 // FLA_Obj_free( &e );
1233 // FLA_Obj_free( &f );
1234 // FLA_Obj_free( &g );
1235 FLA_free( buff_w );
1236 FLA_free( buff_al );
1237 FLA_free( buff_ap );
1238 FLA_free( buff_u );
1239 FLA_free( buff_up );
1240 FLA_free( buff_v );
1241 FLA_free( buff_d );
1242 FLA_free( buff_e );
1243 FLA_free( buff_f );
1244 FLA_free( buff_g );
1245
1246 return FLA_SUCCESS;
1247}

References bl1_daxpyv(), bl1_dconjv(), bl1_dcopyv(), bl1_ddot(), bl1_dgemv(), bl1_dinvscalv(), bl1_dsetm(), bl1_dsetv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opd(), FLA_Househ2s_UT_r_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var4().

◆ FLA_Bidiag_UT_u_step_opd_var5()

FLA_Error FLA_Bidiag_UT_u_step_opd_var5 ( int  m_A,
int  n_A,
int  m_TS,
double buff_A,
int  rs_A,
int  cs_A,
double buff_Y,
int  rs_Y,
int  cs_Y,
double buff_Z,
int  rs_Z,
int  cs_Z,
double buff_T,
int  rs_T,
int  cs_T,
double buff_S,
int  rs_S,
int  cs_S 
)
558{
559 double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
560 double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
562
563 double beta;
564 double last_elem;
565 int i;
566
567 // b_alg = FLA_Obj_length( T );
568 int b_alg = m_TS;
569
570 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
571 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
572 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &d );
573 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &e );
574 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
575 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &g );
576 double* buff_u = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
577 double* buff_v = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
578 double* buff_d = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
579 double* buff_e = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
580 double* buff_f = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
581 double* buff_g = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
582 int inc_u = 1;
583 int inc_v = 1;
584 int inc_d = 1;
585 int inc_e = 1;
586 int inc_f = 1;
587 int inc_g = 1;
588
589 // FLA_Set( FLA_ZERO, Y );
590 // FLA_Set( FLA_ZERO, Z );
591 bl1_dsetm( n_A,
592 b_alg,
593 buff_0,
594 buff_Y, rs_Y, cs_Y );
595 bl1_dsetm( m_A,
596 b_alg,
597 buff_0,
598 buff_Z, rs_Z, cs_Z );
599
600 for ( i = 0; i < b_alg; ++i )
601 {
602 double* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
603 double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
604 double* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
605 double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
606 double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
607 double* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
608 double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
609 double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
610
611 double* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
612 double* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
613 double* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
614
615 double* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
616 double* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
617 double* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
618
619 double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
620 double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
621
622 double* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
623 double* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
624
625 double* u21 = buff_u + (i+1)*inc_u;
626
627 double* v21 = buff_v + (i+1)*inc_v;
628
629 double* d0 = buff_d + (0 )*inc_d;
630
631 double* e0 = buff_e + (0 )*inc_e;
632
633 double* f0 = buff_f + (0 )*inc_f;
634
635 double* g0 = buff_g + (0 )*inc_g;
636
637 double* v21_t = v21 + (0 )*inc_v;
638 double* v21_b = v21 + (1 )*inc_v;
639
640 double* a01_b = a01 + (0 )*cs_A + (i-1)*rs_A;
641
642 double* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
643 double* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
644
645 double* ABL = a10t;
646 double* ZBL = z10t;
647
648 double* a2 = alpha11;
649
650 int m_ahead = m_A - i - 1;
651 int n_ahead = n_A - i - 1;
652 int m_behind = i;
653 int n_behind = i;
654
655 /*------------------------------------------------------------*/
656
657 if ( m_behind > 0 )
658 {
659 // FLA_Copy( a01_b, last_elem );
660 // FLA_Set( FLA_ONE, a01_b );
661 last_elem = *a01_b;
662 *a01_b = *buff_1;
663 }
664
665 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
666 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a01, FLA_ONE, a2 );
669 m_ahead + 1,
670 n_behind,
671 buff_m1,
672 ABL, rs_A, cs_A,
673 y10t, cs_Y,
674 buff_1,
675 a2, rs_A );
678 m_ahead + 1,
679 n_behind,
680 buff_m1,
681 ZBL, rs_Z, cs_Z,
682 a01, rs_A,
683 buff_1,
684 a2, rs_A );
685
686 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
687 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, A02, z10t, FLA_ONE, a12t );
690 n_ahead,
691 n_behind,
692 buff_m1,
693 Y20, rs_Y, cs_Y,
694 a10t, cs_A,
695 buff_1,
696 a12t, cs_A );
699 m_behind,
700 n_ahead,
701 buff_m1,
702 A02, rs_A, cs_A,
703 z10t, cs_Z,
704 buff_1,
705 a12t, cs_A );
706
707 if ( m_behind > 0 )
708 {
709 // FLA_Copy( last_elem, a01_b );
710 *a01_b = last_elem;
711 }
712
713 // FLA_Househ2_UT( FLA_LEFT,
714 // alpha11,
715 // a21, tau11 );
716 // FLA_Copy( a21, u21 );
718 alpha11,
719 a21, rs_A,
720 tau11 );
722 m_ahead,
723 a21, rs_A,
724 u21, inc_u );
725
726 if ( n_ahead > 0 )
727 {
728 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a12t, y21 );
729 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, u21, FLA_ONE, y21 );
731 n_ahead,
732 a12t, cs_A,
733 y21, rs_Y );
736 m_ahead,
737 n_ahead,
738 buff_1,
739 A22, rs_A, cs_A,
740 u21, inc_u,
741 buff_1,
742 y21, rs_Y );
743
744 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ZERO, d0 );
745 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, u21, FLA_ZERO, e0 );
748 m_ahead,
749 n_behind,
750 buff_1,
751 A20, rs_A, cs_A,
752 u21, inc_u,
753 buff_0,
754 d0, inc_d );
757 m_ahead,
758 n_behind,
759 buff_1,
760 Z20, rs_Z, cs_Z,
761 u21, inc_u,
762 buff_0,
763 e0, inc_e );
764
765 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
766 // FLA_Axpy( FLA_ONE, d0, t01 );
768 n_behind,
769 a10t, cs_A,
770 t01, rs_T );
772 n_behind,
773 buff_1,
774 d0, inc_d,
775 t01, rs_T );
776
777 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
778 // FLA_Gemv( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, e0, FLA_ONE, y21 );
781 n_ahead,
782 n_behind,
783 buff_m1,
784 Y20, rs_Y, cs_Y,
785 d0, inc_d,
786 buff_1,
787 y21, rs_Y );
790 m_behind,
791 n_ahead,
792 buff_m1,
793 A02, rs_A, cs_A,
794 e0, inc_e,
795 buff_1,
796 y21, rs_Y );
797
798 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
800 n_ahead,
801 tau11,
802 y21, rs_Y );
803
804 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, y21, a12t );
806 n_ahead,
807 buff_m1,
808 y21, rs_Y,
809 a12t, cs_A );
810
811 // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
813 a12t_l,
814 a12t_r, cs_A,
815 sigma11 );
816
817 // FLA_Set( FLA_ONE, v21_t );
818 // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
819 *v21_t = *buff_1;
821 n_ahead - 1,
822 a12t_r, cs_A,
823 v21_b, inc_v );
824
825 // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
826 // FLA_Scal( FLA_MINUS_ONE, beta );
828 n_ahead,
829 y21, rs_Y,
830 v21, inc_v,
831 &beta );
833
834 // FLA_Copy( u21, z21 );
835 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, v21, beta, z21 );
837 m_ahead,
838 u21, inc_u,
839 z21, rs_Z );
842 m_ahead,
843 n_ahead,
844 buff_1,
845 A22, rs_A, cs_A,
846 v21, inc_v,
847 &beta,
848 z21, rs_Z );
849
850 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, Y20, v21, FLA_ZERO, f0 );
851 // FLA_Gemvc( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, g0 );
854 n_ahead,
855 m_behind,
856 buff_1,
857 Y20, rs_Y, cs_Y,
858 v21, inc_v,
859 buff_0,
860 f0, inc_f );
863 m_behind,
864 n_ahead,
865 buff_1,
866 A02, rs_A, cs_A,
867 v21, inc_v,
868 buff_0,
869 g0, inc_g );
870
871 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, z21 );
872 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, g0, FLA_ONE, z21 );
875 m_ahead,
876 n_behind,
877 buff_m1,
878 A20, rs_A, cs_A,
879 f0, inc_f,
880 buff_1,
881 z21, rs_Z );
884 m_ahead,
885 n_behind,
886 buff_m1,
887 Z20, rs_Z, cs_Z,
888 g0, inc_g,
889 buff_1,
890 z21, rs_Z );
891
892 // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
894 m_ahead,
895 sigma11,
896 z21, rs_Z );
897
898 // FLA_Copy( g0, s01 );
900 n_behind,
901 g0, inc_g,
902 s01, rs_S );
903 }
904 else // if ( n_ahead == 0 )
905 {
906 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
907 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
909 n_behind,
910 a10t, cs_A,
911 t01, rs_T );
914 m_ahead,
915 n_behind,
916 buff_1,
917 A20, rs_A, cs_A,
918 u21, inc_u,
919 buff_1,
920 t01, rs_T );
921 }
922
923 /*------------------------------------------------------------*/
924
925 }
926
927 // FLA_Obj_free( &u );
928 // FLA_Obj_free( &v );
929 // FLA_Obj_free( &d );
930 // FLA_Obj_free( &e );
931 // FLA_Obj_free( &f );
932 // FLA_Obj_free( &g );
933 FLA_free( buff_u );
934 FLA_free( buff_v );
935 FLA_free( buff_d );
936 FLA_free( buff_e );
937 FLA_free( buff_f );
938 FLA_free( buff_g );
939
940 return FLA_SUCCESS;
941}

References bl1_daxpyv(), bl1_dcopyv(), bl1_ddot(), bl1_dgemv(), bl1_dinvscalv(), bl1_dsetm(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opd(), FLA_Househ2_UT_r_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var5().

◆ FLA_Bidiag_UT_u_step_ops_var1()

FLA_Error FLA_Bidiag_UT_u_step_ops_var1 ( int  m_A,
int  n_A,
int  m_TS,
float buff_A,
int  rs_A,
int  cs_A,
float buff_T,
int  rs_T,
int  cs_T,
float buff_S,
int  rs_S,
int  cs_S 
)
120{
121 float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
122 float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
123
124 int i;
125
126 // b_alg = FLA_Obj_length( T );
127 int b_alg = m_TS;
128
129 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
130 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
131 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
132 float* buff_v = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
133 int inc_v = 1;
134
135 for ( i = 0; i < b_alg; ++i )
136 {
137 float* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
138 float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
139 float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
140 float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
141 float* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
142 float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
143 float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
144
145 float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
146 float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
147
148 float* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
149 float* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
150
151 float* v21 = buff_v + (i+1)*inc_v;
152
153 float* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
154 float* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
155
156 float* A22_l = A22 + (0 )*cs_A + (0 )*rs_A;
157 float* A22_r = A22 + (1 )*cs_A + (0 )*rs_A;
158
159 float* v21_t = v21 + (0 )*inc_v;
160 float* v21_b = v21 + (1 )*inc_v;
161
162 int m_ahead = m_A - i - 1;
163 int n_ahead = n_A - i - 1;
164 int m_behind = i;
165 int n_behind = i;
166
167 /*------------------------------------------------------------*/
168
169 // FLA_Househ2_UT( FLA_LEFT,
170 // alpha11,
171 // a21, tau11 );
173 alpha11,
174 a21, rs_A,
175 tau11 );
176
177 if ( n_ahead > 0 )
178 {
179 // FLA_Apply_H2_UT( FLA_LEFT, tau11, a21, a12t, A22 );
181 n_ahead,
182 tau11,
183 a21, rs_A,
184 a12t, cs_A,
185 A22, rs_A, cs_A );
186
187 // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
189 a12t_l,
190 a12t_r, cs_A,
191 sigma11 );
192
193 // FLA_Set( FLA_ONE, v21_t );
194 // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
195 *v21_t = *buff_1;
197 n_ahead - 1,
198 a12t_r, cs_A,
199 v21_b, inc_v );
200
201 // FLA_Apply_H2_UT( FLA_RIGHT, sigma11, v21_b, A22_l, A22_r );
203 n_ahead - 1,
204 sigma11,
205 v21_b, inc_v,
206 A22_l, rs_A,
207 A22_r, rs_A, cs_A );
208
209 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
212 m_behind,
213 n_ahead,
214 buff_1,
215 A02, rs_A, cs_A,
216 v21, inc_v,
217 buff_0,
218 s01, rs_S );
219 }
220
221 // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
222 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
224 n_behind,
225 a10t, cs_A,
226 t01, rs_T );
229 m_ahead,
230 n_behind,
231 buff_1,
232 A20, rs_A, cs_A,
233 a21, rs_A,
234 buff_1,
235 t01, rs_T );
236
237 /*------------------------------------------------------------*/
238
239 }
240
241 // FLA_Obj_free( &v );
242 FLA_free( buff_v );
243
244 return FLA_SUCCESS;
245}
FLA_Error FLA_Apply_H2_UT_l_ops_var1(int m_u2_A2, int n_a1t, float *tau, float *u2, int inc_u2, float *a1t, int inc_a1t, float *A2, int rs_A2, int cs_A2)
Definition FLA_Apply_H2_UT_l_opt_var1.c:121
FLA_Error FLA_Apply_H2_UT_r_ops_var1(int n_u2h_A2, int m_a1, float *tau, float *u2h, int inc_u2h, float *a1, int inc_a1, float *A2, int rs_A2, int cs_A2)
Definition FLA_Apply_H2_UT_r_opt_var1.c:108

References bl1_scopyv(), bl1_sgemv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, FLA_Apply_H2_UT_l_ops_var1(), FLA_Apply_H2_UT_r_ops_var1(), FLA_free(), FLA_Househ2_UT_l_ops(), FLA_Househ2_UT_r_ops(), FLA_malloc(), FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var1().

◆ FLA_Bidiag_UT_u_step_ops_var2()

FLA_Error FLA_Bidiag_UT_u_step_ops_var2 ( int  m_A,
int  n_A,
int  m_TS,
float buff_A,
int  rs_A,
int  cs_A,
float buff_T,
int  rs_T,
int  cs_T,
float buff_S,
int  rs_S,
int  cs_S 
)
120{
121 float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
122 float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
124
125 float beta;
126 int i;
127
128 // b_alg = FLA_Obj_length( T );
129 int b_alg = m_TS;
130
131 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
132 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
133 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
134 float* buff_v = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
135 float* buff_y = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
136 float* buff_z = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
137 int inc_v = 1;
138 int inc_y = 1;
139 int inc_z = 1;
140
141 for ( i = 0; i < b_alg; ++i )
142 {
143 float* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
144 float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
145 float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
146 float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
147 float* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
148 float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
149 float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
150
151 float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
152 float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
153
154 float* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
155 float* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
156
157 float* v21 = buff_v + (i+1)*inc_v;
158
159 float* y21 = buff_y + (i+1)*inc_y;
160
161 float* z21 = buff_z + (i+1)*inc_z;
162
163 float* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
164 float* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
165
166 float* v21_t = v21 + (0 )*inc_v;
167 float* v21_b = v21 + (1 )*inc_v;
168
169 int m_ahead = m_A - i - 1;
170 int n_ahead = n_A - i - 1;
171 int m_behind = i;
172 int n_behind = i;
173
174 /*------------------------------------------------------------*/
175
176 // FLA_Househ2_UT( FLA_LEFT,
177 // alpha11,
178 // a21, tau11 );
180 alpha11,
181 a21, rs_A,
182 tau11 );
183
184 if ( n_ahead > 0 )
185 {
186 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a12t, y21 );
187 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, a21, FLA_ONE, y21 );
189 n_ahead,
190 a12t, cs_A,
191 y21, inc_y );
194 m_ahead,
195 n_ahead,
196 buff_1,
197 A22, rs_A, cs_A,
198 a21, rs_A,
199 buff_1,
200 y21, inc_y );
201
202 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
204 n_ahead,
205 tau11,
206 y21, inc_y );
207
208 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, y21, a12t );
210 n_ahead,
211 buff_m1,
212 y21, inc_y,
213 a12t, cs_A );
214
215 // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
217 a12t_l,
218 a12t_r, cs_A,
219 sigma11 );
220
221 // FLA_Set( FLA_ONE, v21_t );
222 // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
223 *v21_t = *buff_1;
225 n_ahead - 1,
226 a12t_r, cs_A,
227 v21_b, inc_y );
228
229 // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
230 // FLA_Scal( FLA_MINUS_ONE, beta );
232 n_ahead,
233 y21, inc_y,
234 v21, inc_v,
235 &beta );
236 bl1_sneg1( &beta );
237
238 // FLA_Copy( a21, z21 );
239 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, v21, beta, z21 );
240 // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
242 m_ahead,
243 a21, rs_A,
244 z21, inc_z );
247 m_ahead,
248 n_ahead,
249 buff_1,
250 A22, rs_A, cs_A,
251 v21, inc_v,
252 &beta,
253 z21, inc_z );
255 m_ahead,
256 sigma11,
257 z21, inc_z );
258
259 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, a21, y21, A22 );
260 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
263 m_ahead,
264 n_ahead,
265 buff_m1,
266 a21, rs_A,
267 y21, inc_y,
268 A22, rs_A, cs_A );
271 m_ahead,
272 n_ahead,
273 buff_m1,
274 z21, inc_z,
275 v21, inc_v,
276 A22, rs_A, cs_A );
277
278 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
281 m_behind,
282 n_ahead,
283 buff_1,
284 A02, rs_A, cs_A,
285 v21, inc_v,
286 buff_0,
287 s01, rs_S );
288 }
289
290 // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
291 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
293 n_behind,
294 a10t, cs_A,
295 t01, rs_T );
298 m_ahead,
299 n_behind,
300 buff_1,
301 A20, rs_A, cs_A,
302 a21, rs_A,
303 buff_1,
304 t01, rs_T );
305
306 /*------------------------------------------------------------*/
307
308 }
309
310 // FLA_Obj_free( &v );
311 // FLA_Obj_free( &y );
312 // FLA_Obj_free( &z );
313 FLA_free( buff_v );
314 FLA_free( buff_y );
315 FLA_free( buff_z );
316
317 return FLA_SUCCESS;
318}

References bl1_saxpyv(), bl1_scopyv(), bl1_sdot(), bl1_sgemv(), bl1_sger(), bl1_sinvscalv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_ops(), FLA_Househ2_UT_r_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var2().

◆ FLA_Bidiag_UT_u_step_ops_var3()

FLA_Error FLA_Bidiag_UT_u_step_ops_var3 ( int  m_A,
int  n_A,
int  m_TS,
float buff_A,
int  rs_A,
int  cs_A,
float buff_T,
int  rs_T,
int  cs_T,
float buff_S,
int  rs_S,
int  cs_S 
)
120{
121 float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
122 float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
124
125 float alpha12;
126 float minus_conj_alpha12;
128 float minus_inv_tau11;
129 float minus_upsilon11;
130 float minus_conj_nu11;
131 float minus_conj_psi11;
132 float minus_zeta11;
133 float beta;
134 int i;
135
136 // b_alg = FLA_Obj_length( T );
137 int b_alg = m_TS;
138
139 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
140 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
141 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
142 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
143 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
144 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
145 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
146 float* buff_w = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
147 float* buff_ap = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
148 float* buff_u = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
149 float* buff_up = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
150 float* buff_v = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
151 float* buff_y = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
152 float* buff_z = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
153 int inc_w = 1;
154 int inc_ap = 1;
155 int inc_u = 1;
156 int inc_up = 1;
157 int inc_v = 1;
158 int inc_y = 1;
159 int inc_z = 1;
160
161 for ( i = 0; i < b_alg; ++i )
162 {
163 float* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
164 float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
165 float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
166 float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
167 float* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
168 float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
169 float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
170
171 float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
172 float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
173
174 float* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
175 float* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
176
177 float* w21 = buff_w + (i+1)*inc_w;
178
179 float* a12p = buff_ap + (i+1)*inc_ap;
180
181 float* upsilon11 = buff_u + (i )*inc_u;
182 float* u21 = buff_u + (i+1)*inc_u;
183
184 float* u21p = buff_up + (i+1)*inc_up;
185
186 float* nu11 = buff_v + (i )*inc_v;
187 float* v21 = buff_v + (i+1)*inc_v;
188
189 float* psi11 = buff_y + (i )*inc_y;
190 float* y21 = buff_y + (i+1)*inc_y;
191
192 float* zeta11 = buff_z + (i )*inc_z;
193 float* z21 = buff_z + (i+1)*inc_z;
194
195 float* a12p_t = a12p + (0 )*inc_ap;
196 float* a12p_b = a12p + (1 )*inc_ap;
197
198 float* v21_t = v21 + (0 )*inc_v;
199 float* v21_b = v21 + (1 )*inc_v;
200
201 float* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
202 float* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
203
204 float* A22_l = A22 + (0 )*cs_A + (0 )*rs_A;
205
206 int m_ahead = m_A - i - 1;
207 int n_ahead = n_A - i - 1;
208 int m_behind = i;
209 int n_behind = i;
210
211 /*------------------------------------------------------------*/
212
213 if ( m_behind > 0 )
214 {
215 // FLA_Copy( upsilon11, minus_upsilon11 );
216 // FLA_Scal( FLA_MINUS_ONE, minus_upsilon11 );
218
219 // FLA_Copy( zeta11, minus_zeta11 );
220 // FLA_Scal( FLA_MINUS_ONE, minus_zeta11 );
222
223 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, psi11, minus_conj_psi11 );
224 // FLA_Scal( FLA_MINUS_ONE, minus_conj_psi11 );
227
228 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, nu11, minus_conj_nu11 );
229 // FLA_Scal( FLA_MINUS_ONE, minus_conj_nu11 );
232
233 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, upsilon11, alpha11 );
234 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11, zeta11, alpha11 );
236 1,
238 upsilon11, 1,
239 alpha11, 1 );
241 1,
243 zeta11, 1,
244 alpha11, 1 );
245
246 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, u21, a21 );
247 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11, z21, a21 );
249 m_ahead,
251 u21, inc_u,
252 a21, rs_A );
254 m_ahead,
256 z21, inc_z,
257 a21, rs_A );
258
259 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_upsilon11, y21, a12t );
260 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_zeta11, v21, a12t );
262 n_ahead,
264 y21, inc_y,
265 a12t, cs_A );
267 n_ahead,
269 v21, inc_v,
270 a12t, cs_A );
271 }
272
273 // FLA_Househ2_UT( FLA_LEFT,
274 // alpha11,
275 // a21, tau11 );
276 // FLA_Copy( a21, u21p );
278 alpha11,
279 a21, rs_A,
280 tau11 );
282 m_ahead,
283 a21, rs_A,
284 u21p, inc_up );
285
286 if ( n_ahead > 0 )
287 {
288 // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
289 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
291
292 // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
293 // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
295 n_ahead,
296 a12t, cs_A,
297 a12p, inc_ap );
299 n_ahead,
301 a12t, cs_A,
302 a12p, inc_ap );
303 }
304
305 if ( m_behind > 0 )
306 {
307 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u21, y21, A22 );
308 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
311 m_ahead,
312 n_ahead,
313 buff_m1,
314 u21, inc_u,
315 y21, inc_y,
316 A22, rs_A, cs_A );
319 m_ahead,
320 n_ahead,
321 buff_m1,
322 z21, inc_z,
323 v21, inc_v,
324 A22, rs_A, cs_A );
325 }
326
327 if ( n_ahead > 0 )
328 {
329 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, u21p, FLA_ZERO, y21 );
332 m_ahead,
333 n_ahead,
334 buff_1,
335 A22, rs_A, cs_A,
336 u21p, inc_up,
337 buff_0,
338 y21, inc_y );
339
340 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
342 n_ahead,
344 y21, inc_y,
345 a12p, inc_ap );
346
347 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
350 m_ahead,
351 n_ahead,
352 buff_1,
353 A22, rs_A, cs_A,
354 a12p, inc_ap,
355 buff_0,
356 w21, inc_w );
357
358 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
360 n_ahead,
361 buff_1,
362 a12t, cs_A,
363 y21, inc_y );
364
365 // FLA_Househ2s_UT( FLA_RIGHT,
366 // a12p_t,
367 // a12p_b,
368 // alpha12, psi11_minus_alpha12, sigma11 );
370 a12p_t,
371 a12p_b, inc_ap,
372 &alpha12,
374 sigma11 );
375
376 // FLA_Copy( a12p, v21 );
377 // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
378 // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
379 // FLA_Conjugate( v21_b );
381 n_ahead,
382 a12p, inc_ap,
383 v21, inc_v );
386 n_ahead,
388 v21, inc_v );
389 bl1_sconjv( n_ahead - 1,
390 v21_b, inc_v );
391
392 // FLA_Copyt( FLA_NO_TRANSPOSE, alpha12, a12t_l );
393 // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
394 *a12t_l = alpha12;
396 n_ahead - 1,
397 v21_b, inc_v,
398 a12t_r, cs_A );
399 }
400
401 // FLA_Copy( u21p, u21 );
403 m_ahead,
404 u21p, inc_up,
405 u21, inc_u );
406
407 if ( n_ahead > 0 )
408 {
409 // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
410 // FLA_Scal( FLA_MINUS_ONE, beta );
411 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
413 n_ahead,
414 y21, inc_y,
415 v21, inc_v,
416 &beta );
418
419 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
420 // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
423
424 // FLA_Copy( w21, z21 );
425 // FLA_Axpy( minus_conj_alpha12, A22_l, z21 );
426 // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
427 // FLA_Axpy( beta, u21, z21 );
429 m_ahead,
430 w21, inc_w,
431 z21, inc_z );
433 m_ahead,
435 A22_l, rs_A,
436 z21, inc_z );
438 m_ahead,
440 z21, inc_z );
442 m_ahead,
443 &beta,
444 u21, inc_u,
445 z21, inc_z );
446
447 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
448 // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
450 n_ahead,
451 tau11,
452 y21, inc_y );
454 m_ahead,
455 sigma11,
456 z21, inc_z );
457
458 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
461 m_behind,
462 n_ahead,
463 buff_1,
464 A02, rs_A, cs_A,
465 v21, inc_v,
466 buff_0,
467 s01, rs_S );
468 }
469
470 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
471 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
473 n_behind,
474 a10t, cs_A,
475 t01, rs_T );
478 m_ahead,
479 n_behind,
480 buff_1,
481 A20, rs_A, cs_A,
482 u21, inc_u,
483 buff_1,
484 t01, rs_T );
485
486 if ( m_behind + 1 == b_alg && n_ahead > 0 )
487 {
488 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u21, y21, A22 );
489 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
492 m_ahead,
493 n_ahead,
494 buff_m1,
495 u21, inc_u,
496 y21, inc_y,
497 A22, rs_A, cs_A );
500 m_ahead,
501 n_ahead,
502 buff_m1,
503 z21, inc_z,
504 v21, inc_v,
505 A22, rs_A, cs_A );
506 }
507
508 /*------------------------------------------------------------*/
509
510 }
511
512 // FLA_Obj_free( &w );
513 // FLA_Obj_free( &ap );
514 // FLA_Obj_free( &u );
515 // FLA_Obj_free( &up );
516 // FLA_Obj_free( &v );
517 // FLA_Obj_free( &y );
518 // FLA_Obj_free( &z );
519 FLA_free( buff_w );
520 FLA_free( buff_ap );
521 FLA_free( buff_u );
522 FLA_free( buff_up );
523 FLA_free( buff_v );
524 FLA_free( buff_y );
525 FLA_free( buff_z );
526
527 return FLA_SUCCESS;
528}

References bl1_saxpyv(), bl1_sconjv(), bl1_scopyv(), bl1_sdot(), bl1_sgemv(), bl1_sger(), bl1_sinvscalv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_ops(), FLA_Househ2s_UT_r_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var3().

◆ FLA_Bidiag_UT_u_step_ops_var4()

FLA_Error FLA_Bidiag_UT_u_step_ops_var4 ( int  m_A,
int  n_A,
int  m_TS,
float buff_A,
int  rs_A,
int  cs_A,
float buff_Y,
int  rs_Y,
int  cs_Y,
float buff_Z,
int  rs_Z,
int  cs_Z,
float buff_T,
int  rs_T,
int  cs_T,
float buff_S,
int  rs_S,
int  cs_S 
)
163{
164 float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
165 float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
167
168 float alpha12;
169 float minus_conj_alpha12;
171 float minus_inv_tau11;
172 float beta;
173 float last_elem;
174 int i;
175
176 // b_alg = FLA_Obj_length( T );
177 int b_alg = m_TS;
178
179 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
180 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &al );
181 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
182 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
183 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
184 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
185 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &d );
186 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &e );
187 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
188 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &g );
189 float* buff_w = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
190 float* buff_al = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
191 float* buff_ap = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
192 float* buff_u = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
193 float* buff_up = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
194 float* buff_v = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
195 float* buff_d = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
196 float* buff_e = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
197 float* buff_f = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
198 float* buff_g = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
199 int inc_w = 1;
200 int inc_al = 1;
201 int inc_ap = 1;
202 int inc_u = 1;
203 int inc_up = 1;
204 int inc_v = 1;
205 int inc_d = 1;
206 int inc_e = 1;
207 int inc_f = 1;
208 int inc_g = 1;
209
210 // FLA_Set( FLA_ZERO, Y );
211 // FLA_Set( FLA_ZERO, Z );
212 bl1_ssetm( n_A,
213 b_alg,
214 buff_0,
215 buff_Y, rs_Y, cs_Y );
216 bl1_ssetm( m_A,
217 b_alg,
218 buff_0,
219 buff_Z, rs_Z, cs_Z );
220
221 for ( i = 0; i < b_alg; ++i )
222 {
223 float* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
224 float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
225 float* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
226 float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
227 float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
228 float* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
229 float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
230 float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
231
232 float* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
233 float* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
234 float* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
235
236 float* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
237 float* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
238 float* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
239
240 float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
241 float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
242
243 float* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
244 float* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
245
246 float* w21 = buff_w + (i+1)*inc_w;
247
248 float* a22l = buff_al + (i+1)*inc_al;
249
250 float* a12p = buff_ap + (i+1)*inc_ap;
251
252 float* u21 = buff_u + (i+1)*inc_u;
253
254 float* u21p = buff_up + (i+1)*inc_up;
255
256 float* v21 = buff_v + (i+1)*inc_v;
257
258 float* d0 = buff_d + (0 )*inc_d;
259
260 float* e0 = buff_e + (0 )*inc_e;
261
262 float* f0 = buff_f + (0 )*inc_f;
263
264 float* g0 = buff_g + (0 )*inc_g;
265
266 float* a12p_t = a12p + (0 )*inc_ap;
267 float* a12p_b = a12p + (1 )*inc_ap;
268
269 float* v21_t = v21 + (0 )*inc_v;
270 float* v21_b = v21 + (1 )*inc_v;
271
272 float* a01_b = a01 + (0 )*cs_A + (i-1)*rs_A;
273
274 float* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
275 float* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
276
277 float* A02_l = A02 + (0 )*cs_A + (0 )*rs_A;
278
279 float* A22_l = A22 + (0 )*cs_A + (0 )*rs_A;
280
281 float* Y20_t = Y20 + (0 )*cs_Y + (0 )*rs_Y;
282
283 float* ABL = a10t;
284 float* ZBL = z10t;
285
286 float* a2 = alpha11;
287
288 int m_ahead = m_A - i - 1;
289 int n_ahead = n_A - i - 1;
290 int m_behind = i;
291 int n_behind = i;
292
293 /*------------------------------------------------------------*/
294
295 if ( m_behind > 0 )
296 {
297 // FLA_Copy( a01_b, last_elem );
298 // FLA_Set( FLA_ONE, a01_b );
299 last_elem = *a01_b;
300 *a01_b = *buff_1;
301 }
302
303 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
304 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a01, FLA_ONE, a2 );
307 m_ahead + 1,
308 n_behind,
309 buff_m1,
310 ABL, rs_A, cs_A,
311 y10t, cs_Y,
312 buff_1,
313 a2, rs_A );
316 m_ahead + 1,
317 n_behind,
318 buff_m1,
319 ZBL, rs_Z, cs_Z,
320 a01, rs_A,
321 buff_1,
322 a2, rs_A );
323
324 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
325 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, A02, z10t, FLA_ONE, a12t );
328 n_ahead,
329 n_behind,
330 buff_m1,
331 Y20, rs_Y, cs_Y,
332 a10t, cs_A,
333 buff_1,
334 a12t, cs_A );
337 m_behind,
338 n_ahead,
339 buff_m1,
340 A02, rs_A, cs_A,
341 z10t, cs_Z,
342 buff_1,
343 a12t, cs_A );
344
345 if ( m_behind > 0 )
346 {
347 // FLA_Copy( last_elem, a01_b );
348 *a01_b = last_elem;
349 }
350
351 // FLA_Househ2_UT( FLA_LEFT,
352 // alpha11,
353 // a21, tau11 );
354 // FLA_Copy( a21, u21p );
356 alpha11,
357 a21, rs_A,
358 tau11 );
360 m_ahead,
361 a21, rs_A,
362 u21p, inc_up );
363
364 if ( n_ahead > 0 )
365 {
366 // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
367 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
369
370 // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
371 // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
373 n_ahead,
374 a12t, cs_A,
375 a12p, inc_ap );
377 n_ahead,
379 a12t, cs_A,
380 a12p, inc_ap );
381
382 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21p, FLA_ZERO, d0 );
383 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, u21p, FLA_ZERO, e0 );
386 m_ahead,
387 n_behind,
388 buff_1,
389 A20, rs_A, cs_A,
390 u21p, inc_up,
391 buff_0,
392 d0, inc_d );
395 m_ahead,
396 n_behind,
397 buff_1,
398 Z20, rs_Z, cs_Z,
399 u21p, inc_up,
400 buff_0,
401 e0, inc_e );
402
403 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
404 // FLA_Axpy( FLA_ONE, d0, t01 );
406 n_behind,
407 a10t, cs_A,
408 t01, rs_T );
410 n_behind,
411 buff_1,
412 d0, inc_d,
413 t01, rs_T );
414
415 // FLA_Set( FLA_ZERO, y21 );
416 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
417 // FLA_Gemv( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, e0, FLA_ONE, y21 );
419 buff_0,
420 y21, rs_Y );
423 n_ahead,
424 n_behind,
425 buff_m1,
426 Y20, rs_Y, cs_Y,
427 d0, inc_d,
428 buff_1,
429 y21, rs_Y );
432 m_behind,
433 n_ahead,
434 buff_m1,
435 A02, rs_A, cs_A,
436 e0, inc_e,
437 buff_1,
438 y21, rs_Y );
439
440 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, u21p, FLA_ONE, y21 );
443 m_ahead,
444 n_ahead,
445 buff_1,
446 A22, rs_A, cs_A,
447 u21p, inc_up,
448 buff_1,
449 y21, rs_Y );
450
451 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
453 n_ahead,
455 y21, rs_Y,
456 a12p, inc_ap );
457
458 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
461 m_ahead,
462 n_ahead,
463 buff_1,
464 A22, rs_A, cs_A,
465 a12p, inc_ap,
466 buff_0,
467 w21, inc_w );
468
469 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, Y20, a12p, FLA_ZERO, f0 );
470 // FLA_Gemvc( FLA_CONJ_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A02, a12p, FLA_ZERO, g0 );
473 n_ahead,
474 n_behind,
475 buff_1,
476 Y20, rs_Y, cs_Y,
477 a12p, inc_ap,
478 buff_0,
479 f0, inc_f );
482 m_behind,
483 n_ahead,
484 buff_1,
485 A02, rs_A, cs_A,
486 a12p, inc_ap,
487 buff_0,
488 g0, inc_g );
489
490 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, w21 );
491 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, g0, FLA_ONE, w21 );
494 m_ahead,
495 n_behind,
496 buff_m1,
497 A20, rs_A, cs_A,
498 f0, inc_f,
499 buff_1,
500 w21, inc_w );
503 m_ahead,
504 n_behind,
505 buff_m1,
506 Z20, rs_Z, cs_Z,
507 g0, inc_g,
508 buff_1,
509 w21, inc_w );
510
511 // FLA_Copy( A22_l, a22l );
512 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, A20, Y20_t, FLA_ONE, a22l );
513 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, Z20, A02_l, FLA_ONE, a22l );
515 m_ahead,
516 A22_l, rs_A,
517 a22l, inc_al );
520 m_ahead,
521 n_behind,
522 buff_m1,
523 A20, rs_A, cs_A,
524 Y20_t, cs_Y,
525 buff_1,
526 a22l, inc_al );
529 m_ahead,
530 n_behind,
531 buff_m1,
532 Z20, rs_Z, cs_Z,
533 A02_l, rs_A,
534 buff_1,
535 a22l, inc_al );
536
537 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
539 n_ahead,
540 buff_1,
541 a12t, cs_A,
542 y21, rs_Y );
543
544 // FLA_Househ2s_UT( FLA_RIGHT,
545 // a12p_t,
546 // a12p_b,
547 // alpha12, psi11_minus_alpha12, sigma11 );
549 a12p_t,
550 a12p_b, inc_ap,
551 &alpha12,
553 sigma11 );
554
555 // FLA_Copy( a12p, v21 );
556 // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
557 // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
558 // FLA_Conjugate( v21_b );
560 n_ahead,
561 a12p, inc_ap,
562 v21, inc_v );
565 n_ahead,
567 v21, inc_v );
568 bl1_sconjv( n_ahead - 1,
569 v21_b, inc_v );
570
571 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
572 // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
575
576 // FLA_Copy( g0, s01 );
577 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_conj_alpha12, A02_l, s01 );
578 // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, s01 );
580 n_behind,
581 g0, inc_g,
582 s01, rs_S );
584 n_behind,
586 A02_l, rs_A,
587 s01, rs_S );
589 n_behind,
591 s01, rs_S );
592
593 // FLA_Copyt( FLA_NO_TRANSPOSE, alpha12, a12t_l );
594 // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
595 *a12t_l = alpha12;
597 n_ahead - 1,
598 v21_b, inc_v,
599 a12t_r, cs_A );
600 }
601
602 // FLA_Copy( u21p, u21 );
604 m_ahead,
605 u21p, inc_up,
606 u21, inc_u );
607
608 if ( n_ahead > 0 )
609 {
610 // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
611 // FLA_Scal( FLA_MINUS_ONE, beta );
612 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
614 n_ahead,
615 y21, rs_Y,
616 v21, inc_v,
617 &beta );
619
620 // FLA_Copy( w21, z21 );
621 // FLA_Axpy( minus_conj_alpha12, a22l, z21 );
622 // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
623 // FLA_Axpy( beta, u21, z21 );
625 m_ahead,
626 w21, inc_w,
627 z21, rs_Z );
629 m_ahead,
631 a22l, inc_al,
632 z21, rs_Z );
634 m_ahead,
636 z21, rs_Z );
638 m_ahead,
639 &beta,
640 u21, inc_u,
641 z21, rs_Z );
642
643 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
644 // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
646 n_ahead,
647 tau11,
648 y21, rs_Y );
650 m_ahead,
651 sigma11,
652 z21, rs_Z );
653 }
654 else // if ( n_ahead == 0 )
655 {
656 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
657 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
659 n_behind,
660 a10t, cs_A,
661 t01, rs_T );
664 m_ahead,
665 n_behind,
666 buff_1,
667 A20, rs_A, cs_A,
668 u21, inc_u,
669 buff_1,
670 t01, rs_T );
671 }
672
673 /*------------------------------------------------------------*/
674
675 }
676
677 // FLA_Obj_free( &w );
678 // FLA_Obj_free( &al );
679 // FLA_Obj_free( &ap );
680 // FLA_Obj_free( &u );
681 // FLA_Obj_free( &up );
682 // FLA_Obj_free( &v );
683 // FLA_Obj_free( &d );
684 // FLA_Obj_free( &e );
685 // FLA_Obj_free( &f );
686 // FLA_Obj_free( &g );
687 FLA_free( buff_w );
688 FLA_free( buff_al );
689 FLA_free( buff_ap );
690 FLA_free( buff_u );
691 FLA_free( buff_up );
692 FLA_free( buff_v );
693 FLA_free( buff_d );
694 FLA_free( buff_e );
695 FLA_free( buff_f );
696 FLA_free( buff_g );
697
698 return FLA_SUCCESS;
699}

References bl1_saxpyv(), bl1_sconjv(), bl1_scopyv(), bl1_sdot(), bl1_sgemv(), bl1_sinvscalv(), bl1_ssetm(), bl1_ssetv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_ops(), FLA_Househ2s_UT_r_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var4().

◆ FLA_Bidiag_UT_u_step_ops_var5()

FLA_Error FLA_Bidiag_UT_u_step_ops_var5 ( int  m_A,
int  n_A,
int  m_TS,
float buff_A,
int  rs_A,
int  cs_A,
float buff_Y,
int  rs_Y,
int  cs_Y,
float buff_Z,
int  rs_Z,
int  cs_Z,
float buff_T,
int  rs_T,
int  cs_T,
float buff_S,
int  rs_S,
int  cs_S 
)
163{
164 float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
165 float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
167
168 float beta;
169 float last_elem;
170 int i;
171
172 // b_alg = FLA_Obj_length( T );
173 int b_alg = m_TS;
174
175 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
176 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
177 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &d );
178 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &e );
179 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
180 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &g );
181 float* buff_u = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
182 float* buff_v = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
183 float* buff_d = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
184 float* buff_e = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
185 float* buff_f = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
186 float* buff_g = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
187 int inc_u = 1;
188 int inc_v = 1;
189 int inc_d = 1;
190 int inc_e = 1;
191 int inc_f = 1;
192 int inc_g = 1;
193
194 // FLA_Set( FLA_ZERO, Y );
195 // FLA_Set( FLA_ZERO, Z );
196 bl1_ssetm( n_A,
197 b_alg,
198 buff_0,
199 buff_Y, rs_Y, cs_Y );
200 bl1_ssetm( m_A,
201 b_alg,
202 buff_0,
203 buff_Z, rs_Z, cs_Z );
204
205 for ( i = 0; i < b_alg; ++i )
206 {
207 float* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
208 float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
209 float* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
210 float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
211 float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
212 float* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
213 float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
214 float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
215
216 float* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
217 float* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
218 float* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
219
220 float* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
221 float* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
222 float* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
223
224 float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
225 float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
226
227 float* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
228 float* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
229
230 float* u21 = buff_u + (i+1)*inc_u;
231
232 float* v21 = buff_v + (i+1)*inc_v;
233
234 float* d0 = buff_d + (0 )*inc_d;
235
236 float* e0 = buff_e + (0 )*inc_e;
237
238 float* f0 = buff_f + (0 )*inc_f;
239
240 float* g0 = buff_g + (0 )*inc_g;
241
242 float* v21_t = v21 + (0 )*inc_v;
243 float* v21_b = v21 + (1 )*inc_v;
244
245 float* a01_b = a01 + (0 )*cs_A + (i-1)*rs_A;
246
247 float* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
248 float* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
249
250 float* ABL = a10t;
251 float* ZBL = z10t;
252
253 float* a2 = alpha11;
254
255 int m_ahead = m_A - i - 1;
256 int n_ahead = n_A - i - 1;
257 int m_behind = i;
258 int n_behind = i;
259
260 /*------------------------------------------------------------*/
261
262 if ( m_behind > 0 )
263 {
264 // FLA_Copy( a01_b, last_elem );
265 // FLA_Set( FLA_ONE, a01_b );
266 last_elem = *a01_b;
267 *a01_b = *buff_1;
268 }
269
270 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
271 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a01, FLA_ONE, a2 );
274 m_ahead + 1,
275 n_behind,
276 buff_m1,
277 ABL, rs_A, cs_A,
278 y10t, cs_Y,
279 buff_1,
280 a2, rs_A );
283 m_ahead + 1,
284 n_behind,
285 buff_m1,
286 ZBL, rs_Z, cs_Z,
287 a01, rs_A,
288 buff_1,
289 a2, rs_A );
290
291 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
292 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, A02, z10t, FLA_ONE, a12t );
295 n_ahead,
296 n_behind,
297 buff_m1,
298 Y20, rs_Y, cs_Y,
299 a10t, cs_A,
300 buff_1,
301 a12t, cs_A );
304 m_behind,
305 n_ahead,
306 buff_m1,
307 A02, rs_A, cs_A,
308 z10t, cs_Z,
309 buff_1,
310 a12t, cs_A );
311
312 if ( m_behind > 0 )
313 {
314 // FLA_Copy( last_elem, a01_b );
315 *a01_b = last_elem;
316 }
317
318 // FLA_Househ2_UT( FLA_LEFT,
319 // alpha11,
320 // a21, tau11 );
321 // FLA_Copy( a21, u21 );
323 alpha11,
324 a21, rs_A,
325 tau11 );
327 m_ahead,
328 a21, rs_A,
329 u21, inc_u );
330
331 if ( n_ahead > 0 )
332 {
333 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a12t, y21 );
334 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, u21, FLA_ONE, y21 );
336 n_ahead,
337 a12t, cs_A,
338 y21, rs_Y );
341 m_ahead,
342 n_ahead,
343 buff_1,
344 A22, rs_A, cs_A,
345 u21, inc_u,
346 buff_1,
347 y21, rs_Y );
348
349 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ZERO, d0 );
350 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, u21, FLA_ZERO, e0 );
353 m_ahead,
354 n_behind,
355 buff_1,
356 A20, rs_A, cs_A,
357 u21, inc_u,
358 buff_0,
359 d0, inc_d );
362 m_ahead,
363 n_behind,
364 buff_1,
365 Z20, rs_Z, cs_Z,
366 u21, inc_u,
367 buff_0,
368 e0, inc_e );
369
370 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
371 // FLA_Axpy( FLA_ONE, d0, t01 );
373 n_behind,
374 a10t, cs_A,
375 t01, rs_T );
377 n_behind,
378 buff_1,
379 d0, inc_d,
380 t01, rs_T );
381
382 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
383 // FLA_Gemv( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, e0, FLA_ONE, y21 );
386 n_ahead,
387 n_behind,
388 buff_m1,
389 Y20, rs_Y, cs_Y,
390 d0, inc_d,
391 buff_1,
392 y21, rs_Y );
395 m_behind,
396 n_ahead,
397 buff_m1,
398 A02, rs_A, cs_A,
399 e0, inc_e,
400 buff_1,
401 y21, rs_Y );
402
403 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
405 n_ahead,
406 tau11,
407 y21, rs_Y );
408
409 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, y21, a12t );
411 n_ahead,
412 buff_m1,
413 y21, rs_Y,
414 a12t, cs_A );
415
416 // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
418 a12t_l,
419 a12t_r, cs_A,
420 sigma11 );
421
422 // FLA_Set( FLA_ONE, v21_t );
423 // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
424 *v21_t = *buff_1;
426 n_ahead - 1,
427 a12t_r, cs_A,
428 v21_b, inc_v );
429
430 // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
431 // FLA_Scal( FLA_MINUS_ONE, beta );
433 n_ahead,
434 y21, rs_Y,
435 v21, inc_v,
436 &beta );
438
439 // FLA_Copy( u21, z21 );
440 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, v21, beta, z21 );
442 m_ahead,
443 u21, inc_u,
444 z21, rs_Z );
447 m_ahead,
448 n_ahead,
449 buff_1,
450 A22, rs_A, cs_A,
451 v21, inc_v,
452 &beta,
453 z21, rs_Z );
454
455 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, Y20, v21, FLA_ZERO, f0 );
456 // FLA_Gemvc( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, g0 );
459 n_ahead,
460 m_behind,
461 buff_1,
462 Y20, rs_Y, cs_Y,
463 v21, inc_v,
464 buff_0,
465 f0, inc_f );
468 m_behind,
469 n_ahead,
470 buff_1,
471 A02, rs_A, cs_A,
472 v21, inc_v,
473 buff_0,
474 g0, inc_g );
475
476 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, z21 );
477 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, g0, FLA_ONE, z21 );
480 m_ahead,
481 n_behind,
482 buff_m1,
483 A20, rs_A, cs_A,
484 f0, inc_f,
485 buff_1,
486 z21, rs_Z );
489 m_ahead,
490 n_behind,
491 buff_m1,
492 Z20, rs_Z, cs_Z,
493 g0, inc_g,
494 buff_1,
495 z21, rs_Z );
496
497 // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
499 m_ahead,
500 sigma11,
501 z21, rs_Z );
502
503 // FLA_Copy( g0, s01 );
505 n_behind,
506 g0, inc_g,
507 s01, rs_S );
508 }
509 else // if ( n_ahead == 0 )
510 {
511 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
512 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
514 n_behind,
515 a10t, cs_A,
516 t01, rs_T );
519 m_ahead,
520 n_behind,
521 buff_1,
522 A20, rs_A, cs_A,
523 u21, inc_u,
524 buff_1,
525 t01, rs_T );
526 }
527
528 /*------------------------------------------------------------*/
529
530 }
531
532 // FLA_Obj_free( &u );
533 // FLA_Obj_free( &v );
534 // FLA_Obj_free( &d );
535 // FLA_Obj_free( &e );
536 // FLA_Obj_free( &f );
537 // FLA_Obj_free( &g );
538 FLA_free( buff_u );
539 FLA_free( buff_v );
540 FLA_free( buff_d );
541 FLA_free( buff_e );
542 FLA_free( buff_f );
543 FLA_free( buff_g );
544
545 return FLA_SUCCESS;
546}

References bl1_saxpyv(), bl1_scopyv(), bl1_sdot(), bl1_sgemv(), bl1_sinvscalv(), bl1_ssetm(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_ops(), FLA_Househ2_UT_r_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var5().

◆ FLA_Bidiag_UT_u_step_opt_var1()

FLA_Error FLA_Bidiag_UT_u_step_opt_var1 ( FLA_Obj  A,
FLA_Obj  T,
FLA_Obj  S 
)
19{
20 FLA_Datatype datatype;
21 int m_A, n_A, m_TS;
22 int rs_A, cs_A;
23 int rs_T, cs_T;
24 int rs_S, cs_S;
25
26 datatype = FLA_Obj_datatype( A );
27
28 m_A = FLA_Obj_length( A );
29 n_A = FLA_Obj_width( A );
31
34
37
40
41
42 switch ( datatype )
43 {
44 case FLA_FLOAT:
45 {
46 float* buff_A = FLA_FLOAT_PTR( A );
47 float* buff_T = FLA_FLOAT_PTR( T );
48 float* buff_S = FLA_FLOAT_PTR( S );
49
51 n_A,
52 m_TS,
55 buff_S, rs_S, cs_S );
56
57 break;
58 }
59
60 case FLA_DOUBLE:
61 {
62 double* buff_A = FLA_DOUBLE_PTR( A );
63 double* buff_T = FLA_DOUBLE_PTR( T );
64 double* buff_S = FLA_DOUBLE_PTR( S );
65
67 n_A,
68 m_TS,
71 buff_S, rs_S, cs_S );
72
73 break;
74 }
75
76 case FLA_COMPLEX:
77 {
81
83 n_A,
84 m_TS,
87 buff_S, rs_S, cs_S );
88
89 break;
90 }
91
93 {
97
99 n_A,
100 m_TS,
101 buff_A, rs_A, cs_A,
102 buff_T, rs_T, cs_T,
103 buff_S, rs_S, cs_S );
104
105 break;
106 }
107 }
108
109 return FLA_SUCCESS;
110}
FLA_Error FLA_Bidiag_UT_u_step_opz_var1(int m_A, int n_A, int m_TS, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T, dcomplex *buff_S, int rs_S, int cs_S)
Definition FLA_Bidiag_UT_u_opt_var1.c:519
FLA_Error FLA_Bidiag_UT_u_step_opc_var1(int m_A, int n_A, int m_TS, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T, scomplex *buff_S, int rs_S, int cs_S)
Definition FLA_Bidiag_UT_u_opt_var1.c:384
FLA_Error FLA_Bidiag_UT_u_step_opd_var1(int m_A, int n_A, int m_TS, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T, double *buff_S, int rs_S, int cs_S)
Definition FLA_Bidiag_UT_u_opt_var1.c:249
FLA_Error FLA_Bidiag_UT_u_step_ops_var1(int m_A, int n_A, int m_TS, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T, float *buff_S, int rs_S, int cs_S)
Definition FLA_Bidiag_UT_u_opt_var1.c:114

References FLA_Bidiag_UT_u_step_opc_var1(), FLA_Bidiag_UT_u_step_opd_var1(), FLA_Bidiag_UT_u_step_ops_var1(), FLA_Bidiag_UT_u_step_opz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), and i.

Referenced by FLA_Bidiag_UT_u_blk_var1(), and FLA_Bidiag_UT_u_opt_var1().

◆ FLA_Bidiag_UT_u_step_opt_var2()

FLA_Error FLA_Bidiag_UT_u_step_opt_var2 ( FLA_Obj  A,
FLA_Obj  T,
FLA_Obj  S 
)
19{
20 FLA_Datatype datatype;
21 int m_A, n_A, m_TS;
22 int rs_A, cs_A;
23 int rs_T, cs_T;
24 int rs_S, cs_S;
25
26 datatype = FLA_Obj_datatype( A );
27
28 m_A = FLA_Obj_length( A );
29 n_A = FLA_Obj_width( A );
31
34
37
40
41
42 switch ( datatype )
43 {
44 case FLA_FLOAT:
45 {
46 float* buff_A = FLA_FLOAT_PTR( A );
47 float* buff_T = FLA_FLOAT_PTR( T );
48 float* buff_S = FLA_FLOAT_PTR( S );
49
51 n_A,
52 m_TS,
55 buff_S, rs_S, cs_S );
56
57 break;
58 }
59
60 case FLA_DOUBLE:
61 {
62 double* buff_A = FLA_DOUBLE_PTR( A );
63 double* buff_T = FLA_DOUBLE_PTR( T );
64 double* buff_S = FLA_DOUBLE_PTR( S );
65
67 n_A,
68 m_TS,
71 buff_S, rs_S, cs_S );
72
73 break;
74 }
75
76 case FLA_COMPLEX:
77 {
81
83 n_A,
84 m_TS,
87 buff_S, rs_S, cs_S );
88
89 break;
90 }
91
93 {
97
99 n_A,
100 m_TS,
101 buff_A, rs_A, cs_A,
102 buff_T, rs_T, cs_T,
103 buff_S, rs_S, cs_S );
104
105 break;
106 }
107 }
108
109 return FLA_SUCCESS;
110}
FLA_Error FLA_Bidiag_UT_u_step_opz_var2(int m_A, int n_A, int m_TS, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T, dcomplex *buff_S, int rs_S, int cs_S)
Definition FLA_Bidiag_UT_u_opt_var2.c:738
FLA_Error FLA_Bidiag_UT_u_step_opc_var2(int m_A, int n_A, int m_TS, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T, scomplex *buff_S, int rs_S, int cs_S)
Definition FLA_Bidiag_UT_u_opt_var2.c:530
FLA_Error FLA_Bidiag_UT_u_step_opd_var2(int m_A, int n_A, int m_TS, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T, double *buff_S, int rs_S, int cs_S)
Definition FLA_Bidiag_UT_u_opt_var2.c:322
FLA_Error FLA_Bidiag_UT_u_step_ops_var2(int m_A, int n_A, int m_TS, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T, float *buff_S, int rs_S, int cs_S)
Definition FLA_Bidiag_UT_u_opt_var2.c:114

References FLA_Bidiag_UT_u_step_opc_var2(), FLA_Bidiag_UT_u_step_opd_var2(), FLA_Bidiag_UT_u_step_ops_var2(), FLA_Bidiag_UT_u_step_opz_var2(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), and i.

Referenced by FLA_Bidiag_UT_u_blk_var2(), and FLA_Bidiag_UT_u_opt_var2().

◆ FLA_Bidiag_UT_u_step_opt_var3()

FLA_Error FLA_Bidiag_UT_u_step_opt_var3 ( FLA_Obj  A,
FLA_Obj  T,
FLA_Obj  S 
)
19{
20 FLA_Datatype datatype;
21 int m_A, n_A, m_TS;
22 int rs_A, cs_A;
23 int rs_T, cs_T;
24 int rs_S, cs_S;
25
26 datatype = FLA_Obj_datatype( A );
27
28 m_A = FLA_Obj_length( A );
29 n_A = FLA_Obj_width( A );
31
34
37
40
41
42 switch ( datatype )
43 {
44 case FLA_FLOAT:
45 {
46 float* buff_A = FLA_FLOAT_PTR( A );
47 float* buff_T = FLA_FLOAT_PTR( T );
48 float* buff_S = FLA_FLOAT_PTR( S );
49
51 n_A,
52 m_TS,
55 buff_S, rs_S, cs_S );
56
57 break;
58 }
59
60 case FLA_DOUBLE:
61 {
62 double* buff_A = FLA_DOUBLE_PTR( A );
63 double* buff_T = FLA_DOUBLE_PTR( T );
64 double* buff_S = FLA_DOUBLE_PTR( S );
65
67 n_A,
68 m_TS,
71 buff_S, rs_S, cs_S );
72
73 break;
74 }
75
76 case FLA_COMPLEX:
77 {
81
83 n_A,
84 m_TS,
87 buff_S, rs_S, cs_S );
88
89 break;
90 }
91
93 {
97
99 n_A,
100 m_TS,
101 buff_A, rs_A, cs_A,
102 buff_T, rs_T, cs_T,
103 buff_S, rs_S, cs_S );
104
105 break;
106 }
107 }
108
109 return FLA_SUCCESS;
110}
FLA_Error FLA_Bidiag_UT_u_step_opz_var3(int m_A, int n_A, int m_TS, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T, dcomplex *buff_S, int rs_S, int cs_S)
Definition FLA_Bidiag_UT_u_opt_var3.c:1368
FLA_Error FLA_Bidiag_UT_u_step_ops_var3(int m_A, int n_A, int m_TS, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T, float *buff_S, int rs_S, int cs_S)
Definition FLA_Bidiag_UT_u_opt_var3.c:114
FLA_Error FLA_Bidiag_UT_u_step_opd_var3(int m_A, int n_A, int m_TS, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T, double *buff_S, int rs_S, int cs_S)
Definition FLA_Bidiag_UT_u_opt_var3.c:532
FLA_Error FLA_Bidiag_UT_u_step_opc_var3(int m_A, int n_A, int m_TS, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T, scomplex *buff_S, int rs_S, int cs_S)
Definition FLA_Bidiag_UT_u_opt_var3.c:950

References FLA_Bidiag_UT_u_step_opc_var3(), FLA_Bidiag_UT_u_step_opd_var3(), FLA_Bidiag_UT_u_step_ops_var3(), FLA_Bidiag_UT_u_step_opz_var3(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), and i.

Referenced by FLA_Bidiag_UT_u_blk_var3(), and FLA_Bidiag_UT_u_opt_var3().

◆ FLA_Bidiag_UT_u_step_opt_var4()

FLA_Error FLA_Bidiag_UT_u_step_opt_var4 ( FLA_Obj  A,
FLA_Obj  Y,
FLA_Obj  Z,
FLA_Obj  T,
FLA_Obj  S 
)
36{
37 FLA_Datatype datatype;
38 int m_A, n_A, m_TS;
39 int rs_A, cs_A;
40 int rs_Y, cs_Y;
41 int rs_Z, cs_Z;
42 int rs_T, cs_T;
43 int rs_S, cs_S;
44
45 datatype = FLA_Obj_datatype( A );
46
47 m_A = FLA_Obj_length( A );
48 n_A = FLA_Obj_width( A );
50
53
56
59
62
65
66
67 switch ( datatype )
68 {
69 case FLA_FLOAT:
70 {
71 float* buff_A = FLA_FLOAT_PTR( A );
72 float* buff_Y = FLA_FLOAT_PTR( Y );
73 float* buff_Z = FLA_FLOAT_PTR( Z );
74 float* buff_T = FLA_FLOAT_PTR( T );
75 float* buff_S = FLA_FLOAT_PTR( S );
76
78 n_A,
79 m_TS,
84 buff_S, rs_S, cs_S );
85
86 break;
87 }
88
89 case FLA_DOUBLE:
90 {
91 double* buff_A = FLA_DOUBLE_PTR( A );
92 double* buff_Y = FLA_DOUBLE_PTR( Y );
93 double* buff_Z = FLA_DOUBLE_PTR( Z );
94 double* buff_T = FLA_DOUBLE_PTR( T );
95 double* buff_S = FLA_DOUBLE_PTR( S );
96
98 n_A,
99 m_TS,
100 buff_A, rs_A, cs_A,
101 buff_Y, rs_Y, cs_Y,
102 buff_Z, rs_Z, cs_Z,
103 buff_T, rs_T, cs_T,
104 buff_S, rs_S, cs_S );
105
106 break;
107 }
108
109 case FLA_COMPLEX:
110 {
116
118 n_A,
119 m_TS,
120 buff_A, rs_A, cs_A,
121 buff_Y, rs_Y, cs_Y,
122 buff_Z, rs_Z, cs_Z,
123 buff_T, rs_T, cs_T,
124 buff_S, rs_S, cs_S );
125
126 break;
127 }
128
130 {
136
138 n_A,
139 m_TS,
140 buff_A, rs_A, cs_A,
141 buff_Y, rs_Y, cs_Y,
142 buff_Z, rs_Z, cs_Z,
143 buff_T, rs_T, cs_T,
144 buff_S, rs_S, cs_S );
145
146 break;
147 }
148 }
149
150 return FLA_SUCCESS;
151}
FLA_Error FLA_Bidiag_UT_u_step_ops_var4(int m_A, int n_A, int m_TS, float *buff_A, int rs_A, int cs_A, float *buff_Y, int rs_Y, int cs_Y, float *buff_Z, int rs_Z, int cs_Z, float *buff_T, int rs_T, int cs_T, float *buff_S, int rs_S, int cs_S)
Definition FLA_Bidiag_UT_u_opt_var4.c:155
FLA_Error FLA_Bidiag_UT_u_step_opz_var4(int m_A, int n_A, int m_TS, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_Y, int rs_Y, int cs_Y, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_T, int rs_T, int cs_T, dcomplex *buff_S, int rs_S, int cs_S)
Definition FLA_Bidiag_UT_u_opt_var4.c:1799
FLA_Error FLA_Bidiag_UT_u_step_opd_var4(int m_A, int n_A, int m_TS, double *buff_A, int rs_A, int cs_A, double *buff_Y, int rs_Y, int cs_Y, double *buff_Z, int rs_Z, int cs_Z, double *buff_T, int rs_T, int cs_T, double *buff_S, int rs_S, int cs_S)
Definition FLA_Bidiag_UT_u_opt_var4.c:703
FLA_Error FLA_Bidiag_UT_u_step_opc_var4(int m_A, int n_A, int m_TS, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_Y, int rs_Y, int cs_Y, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_T, int rs_T, int cs_T, scomplex *buff_S, int rs_S, int cs_S)
Definition FLA_Bidiag_UT_u_opt_var4.c:1251

References FLA_Bidiag_UT_u_step_opc_var4(), FLA_Bidiag_UT_u_step_opd_var4(), FLA_Bidiag_UT_u_step_ops_var4(), FLA_Bidiag_UT_u_step_opz_var4(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), and i.

Referenced by FLA_Bidiag_UT_u_blk_var4(), and FLA_Bidiag_UT_u_opt_var4().

◆ FLA_Bidiag_UT_u_step_opt_var5()

FLA_Error FLA_Bidiag_UT_u_step_opt_var5 ( FLA_Obj  A,
FLA_Obj  Y,
FLA_Obj  Z,
FLA_Obj  T,
FLA_Obj  S 
)
36{
37 FLA_Datatype datatype;
38 int m_A, n_A, m_TS;
39 int rs_A, cs_A;
40 int rs_Y, cs_Y;
41 int rs_Z, cs_Z;
42 int rs_T, cs_T;
43 int rs_S, cs_S;
44
45 datatype = FLA_Obj_datatype( A );
46
47 m_A = FLA_Obj_length( A );
48 n_A = FLA_Obj_width( A );
50
53
56
59
62
65
66
67 switch ( datatype )
68 {
69 case FLA_FLOAT:
70 {
71 float* buff_A = FLA_FLOAT_PTR( A );
72 float* buff_Y = FLA_FLOAT_PTR( Y );
73 float* buff_Z = FLA_FLOAT_PTR( Z );
74 float* buff_T = FLA_FLOAT_PTR( T );
75 float* buff_S = FLA_FLOAT_PTR( S );
76
78 n_A,
79 m_TS,
84 buff_S, rs_S, cs_S );
85
86 break;
87 }
88
89 case FLA_DOUBLE:
90 {
91 double* buff_A = FLA_DOUBLE_PTR( A );
92 double* buff_Y = FLA_DOUBLE_PTR( Y );
93 double* buff_Z = FLA_DOUBLE_PTR( Z );
94 double* buff_T = FLA_DOUBLE_PTR( T );
95 double* buff_S = FLA_DOUBLE_PTR( S );
96
98 n_A,
99 m_TS,
100 buff_A, rs_A, cs_A,
101 buff_Y, rs_Y, cs_Y,
102 buff_Z, rs_Z, cs_Z,
103 buff_T, rs_T, cs_T,
104 buff_S, rs_S, cs_S );
105
106 break;
107 }
108
109 case FLA_COMPLEX:
110 {
116
118 n_A,
119 m_TS,
120 buff_A, rs_A, cs_A,
121 buff_Y, rs_Y, cs_Y,
122 buff_Z, rs_Z, cs_Z,
123 buff_T, rs_T, cs_T,
124 buff_S, rs_S, cs_S );
125
126 break;
127 }
128
130 {
136
138 n_A,
139 m_TS,
140 buff_A, rs_A, cs_A,
141 buff_Y, rs_Y, cs_Y,
142 buff_Z, rs_Z, cs_Z,
143 buff_T, rs_T, cs_T,
144 buff_S, rs_S, cs_S );
145
146 break;
147 }
148 }
149
150 return FLA_SUCCESS;
151}
FLA_Error FLA_Bidiag_UT_u_step_opc_var5(int m_A, int n_A, int m_TS, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_Y, int rs_Y, int cs_Y, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_T, int rs_T, int cs_T, scomplex *buff_S, int rs_S, int cs_S)
Definition FLA_Bidiag_UT_u_opt_var5.c:945
FLA_Error FLA_Bidiag_UT_u_step_opd_var5(int m_A, int n_A, int m_TS, double *buff_A, int rs_A, int cs_A, double *buff_Y, int rs_Y, int cs_Y, double *buff_Z, int rs_Z, int cs_Z, double *buff_T, int rs_T, int cs_T, double *buff_S, int rs_S, int cs_S)
Definition FLA_Bidiag_UT_u_opt_var5.c:550
FLA_Error FLA_Bidiag_UT_u_step_ops_var5(int m_A, int n_A, int m_TS, float *buff_A, int rs_A, int cs_A, float *buff_Y, int rs_Y, int cs_Y, float *buff_Z, int rs_Z, int cs_Z, float *buff_T, int rs_T, int cs_T, float *buff_S, int rs_S, int cs_S)
Definition FLA_Bidiag_UT_u_opt_var5.c:155
FLA_Error FLA_Bidiag_UT_u_step_opz_var5(int m_A, int n_A, int m_TS, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_Y, int rs_Y, int cs_Y, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_T, int rs_T, int cs_T, dcomplex *buff_S, int rs_S, int cs_S)
Definition FLA_Bidiag_UT_u_opt_var5.c:1340

References FLA_Bidiag_UT_u_step_opc_var5(), FLA_Bidiag_UT_u_step_opd_var5(), FLA_Bidiag_UT_u_step_ops_var5(), FLA_Bidiag_UT_u_step_opz_var5(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), and i.

Referenced by FLA_Bidiag_UT_u_blk_var5(), and FLA_Bidiag_UT_u_opt_var5().

◆ FLA_Bidiag_UT_u_step_opz_var1()

FLA_Error FLA_Bidiag_UT_u_step_opz_var1 ( int  m_A,
int  n_A,
int  m_TS,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_T,
int  rs_T,
int  cs_T,
dcomplex buff_S,
int  rs_S,
int  cs_S 
)
525{
528
529 int i;
530
531 // b_alg = FLA_Obj_length( T );
532 int b_alg = m_TS;
533
534 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
535 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
536 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
537 dcomplex* buff_v = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
538 int inc_v = 1;
539
540 for ( i = 0; i < b_alg; ++i )
541 {
542 dcomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
543 dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
544 dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
545 dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
546 dcomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
547 dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
548 dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
549
550 dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
551 dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
552
553 dcomplex* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
554 dcomplex* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
555
556 dcomplex* v21 = buff_v + (i+1)*inc_v;
557
558 dcomplex* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
559 dcomplex* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
560
561 dcomplex* A22_l = A22 + (0 )*cs_A + (0 )*rs_A;
562 dcomplex* A22_r = A22 + (1 )*cs_A + (0 )*rs_A;
563
564 dcomplex* v21_t = v21 + (0 )*inc_v;
565 dcomplex* v21_b = v21 + (1 )*inc_v;
566
567 int m_ahead = m_A - i - 1;
568 int n_ahead = n_A - i - 1;
569 int m_behind = i;
570 int n_behind = i;
571
572 /*------------------------------------------------------------*/
573
574 // FLA_Househ2_UT( FLA_LEFT,
575 // alpha11,
576 // a21, tau11 );
578 alpha11,
579 a21, rs_A,
580 tau11 );
581
582 if ( n_ahead > 0 )
583 {
584 // FLA_Apply_H2_UT( FLA_LEFT, tau11, a21, a12t, A22 );
586 n_ahead,
587 tau11,
588 a21, rs_A,
589 a12t, cs_A,
590 A22, rs_A, cs_A );
591
592 // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
594 a12t_l,
595 a12t_r, cs_A,
596 sigma11 );
597
598 // FLA_Set( FLA_ONE, v21_t );
599 // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
600 *v21_t = *buff_1;
602 n_ahead - 1,
603 a12t_r, cs_A,
604 v21_b, inc_v );
605
606 // FLA_Apply_H2_UT( FLA_RIGHT, sigma11, v21_b, A22_l, A22_r );
608 n_ahead - 1,
609 sigma11,
610 v21_b, inc_v,
611 A22_l, rs_A,
612 A22_r, rs_A, cs_A );
613
614 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
617 m_behind,
618 n_ahead,
619 buff_1,
620 A02, rs_A, cs_A,
621 v21, inc_v,
622 buff_0,
623 s01, rs_S );
624 }
625
626 // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
627 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
629 n_behind,
630 a10t, cs_A,
631 t01, rs_T );
634 m_ahead,
635 n_behind,
636 buff_1,
637 A20, rs_A, cs_A,
638 a21, rs_A,
639 buff_1,
640 t01, rs_T );
641
642 /*------------------------------------------------------------*/
643
644 }
645
646 // FLA_Obj_free( &v );
647 FLA_free( buff_v );
648
649 return FLA_SUCCESS;
650}
FLA_Error FLA_Apply_H2_UT_l_opz_var1(int m_u2_A2, int n_a1t, dcomplex *tau, dcomplex *u2, int inc_u2, dcomplex *a1t, int inc_a1t, dcomplex *A2, int rs_A2, int cs_A2)
Definition FLA_Apply_H2_UT_l_opt_var1.c:343
FLA_Error FLA_Apply_H2_UT_r_opz_var1(int n_u2h_A2, int m_a1, dcomplex *tau, dcomplex *u2h, int inc_u2h, dcomplex *a1, int inc_a1, dcomplex *A2, int rs_A2, int cs_A2)
Definition FLA_Apply_H2_UT_r_opt_var1.c:327

References bl1_zcopyv(), bl1_zgemv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, FLA_Apply_H2_UT_l_opz_var1(), FLA_Apply_H2_UT_r_opz_var1(), FLA_free(), FLA_Househ2_UT_l_opz(), FLA_Househ2_UT_r_opz(), FLA_malloc(), FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var1().

◆ FLA_Bidiag_UT_u_step_opz_var2()

FLA_Error FLA_Bidiag_UT_u_step_opz_var2 ( int  m_A,
int  n_A,
int  m_TS,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_T,
int  rs_T,
int  cs_T,
dcomplex buff_S,
int  rs_S,
int  cs_S 
)
744{
748
750 int i;
751
752 // b_alg = FLA_Obj_length( T );
753 int b_alg = m_TS;
754
755 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
756 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
757 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
758 dcomplex* buff_v = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
759 dcomplex* buff_y = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
760 dcomplex* buff_z = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
761 int inc_v = 1;
762 int inc_y = 1;
763 int inc_z = 1;
764
765 for ( i = 0; i < b_alg; ++i )
766 {
767 dcomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
768 dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
769 dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
770 dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
771 dcomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
772 dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
773 dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
774
775 dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
776 dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
777
778 dcomplex* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
779 dcomplex* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
780
781 dcomplex* v21 = buff_v + (i+1)*inc_v;
782
783 dcomplex* y21 = buff_y + (i+1)*inc_y;
784
785 dcomplex* z21 = buff_z + (i+1)*inc_z;
786
787 dcomplex* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
788 dcomplex* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
789
790 dcomplex* v21_t = v21 + (0 )*inc_v;
791 dcomplex* v21_b = v21 + (1 )*inc_v;
792
793 int m_ahead = m_A - i - 1;
794 int n_ahead = n_A - i - 1;
795 int m_behind = i;
796 int n_behind = i;
797
798 /*------------------------------------------------------------*/
799
800 // FLA_Househ2_UT( FLA_LEFT,
801 // alpha11,
802 // a21, tau11 );
804 alpha11,
805 a21, rs_A,
806 tau11 );
807
808 if ( n_ahead > 0 )
809 {
810 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a12t, y21 );
811 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, a21, FLA_ONE, y21 );
813 n_ahead,
814 a12t, cs_A,
815 y21, inc_y );
818 m_ahead,
819 n_ahead,
820 buff_1,
821 A22, rs_A, cs_A,
822 a21, rs_A,
823 buff_1,
824 y21, inc_y );
825
826 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
828 n_ahead,
829 tau11,
830 y21, inc_y );
831
832 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, y21, a12t );
834 n_ahead,
835 buff_m1,
836 y21, inc_y,
837 a12t, cs_A );
838
839 // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
841 a12t_l,
842 a12t_r, cs_A,
843 sigma11 );
844
845 // FLA_Set( FLA_ONE, v21_t );
846 // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
847 *v21_t = *buff_1;
849 n_ahead - 1,
850 a12t_r, cs_A,
851 v21_b, inc_y );
852
853 // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
854 // FLA_Scal( FLA_MINUS_ONE, beta );
856 n_ahead,
857 y21, inc_y,
858 v21, inc_v,
859 &beta );
860 bl1_zneg1( &beta );
861
862 // FLA_Copy( a21, z21 );
863 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, v21, beta, z21 );
864 // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
866 m_ahead,
867 a21, rs_A,
868 z21, inc_z );
871 m_ahead,
872 n_ahead,
873 buff_1,
874 A22, rs_A, cs_A,
875 v21, inc_v,
876 &beta,
877 z21, inc_z );
879 m_ahead,
880 sigma11,
881 z21, inc_z );
882
883 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, a21, y21, A22 );
884 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
887 m_ahead,
888 n_ahead,
889 buff_m1,
890 a21, rs_A,
891 y21, inc_y,
892 A22, rs_A, cs_A );
895 m_ahead,
896 n_ahead,
897 buff_m1,
898 z21, inc_z,
899 v21, inc_v,
900 A22, rs_A, cs_A );
901
902 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
905 m_behind,
906 n_ahead,
907 buff_1,
908 A02, rs_A, cs_A,
909 v21, inc_v,
910 buff_0,
911 s01, rs_S );
912 }
913
914 // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
915 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
917 n_behind,
918 a10t, cs_A,
919 t01, rs_T );
922 m_ahead,
923 n_behind,
924 buff_1,
925 A20, rs_A, cs_A,
926 a21, rs_A,
927 buff_1,
928 t01, rs_T );
929
930 /*------------------------------------------------------------*/
931
932 }
933
934 // FLA_Obj_free( &v );
935 // FLA_Obj_free( &y );
936 // FLA_Obj_free( &z );
937 FLA_free( buff_v );
938 FLA_free( buff_y );
939 FLA_free( buff_z );
940
941 return FLA_SUCCESS;
942}

References bl1_zaxpyv(), bl1_zcopyv(), bl1_zdot(), bl1_zgemv(), bl1_zger(), bl1_zinvscalv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opz(), FLA_Househ2_UT_r_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var2().

◆ FLA_Bidiag_UT_u_step_opz_var3()

FLA_Error FLA_Bidiag_UT_u_step_opz_var3 ( int  m_A,
int  n_A,
int  m_TS,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_T,
int  rs_T,
int  cs_T,
dcomplex buff_S,
int  rs_S,
int  cs_S 
)
1374{
1378
1387 dcomplex beta;
1388 int i;
1389
1390 // b_alg = FLA_Obj_length( T );
1391 int b_alg = m_TS;
1392
1393 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
1394 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
1395 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
1396 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
1397 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
1398 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
1399 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
1400 dcomplex* buff_w = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1401 dcomplex* buff_ap = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1402 dcomplex* buff_u = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1403 dcomplex* buff_up = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1404 dcomplex* buff_v = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1405 dcomplex* buff_y = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1406 dcomplex* buff_z = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1407 int inc_w = 1;
1408 int inc_ap = 1;
1409 int inc_u = 1;
1410 int inc_up = 1;
1411 int inc_v = 1;
1412 int inc_y = 1;
1413 int inc_z = 1;
1414
1415 for ( i = 0; i < b_alg; ++i )
1416 {
1417 dcomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
1418 dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
1419 dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
1420 dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
1421 dcomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
1422 dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
1423 dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
1424
1425 dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
1426 dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
1427
1428 dcomplex* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
1429 dcomplex* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
1430
1431 dcomplex* w21 = buff_w + (i+1)*inc_w;
1432
1433 dcomplex* a12p = buff_ap + (i+1)*inc_ap;
1434
1436 dcomplex* u21 = buff_u + (i+1)*inc_u;
1437
1438 dcomplex* u21p = buff_up + (i+1)*inc_up;
1439
1440 dcomplex* nu11 = buff_v + (i )*inc_v;
1441 dcomplex* v21 = buff_v + (i+1)*inc_v;
1442
1443 dcomplex* psi11 = buff_y + (i )*inc_y;
1444 dcomplex* y21 = buff_y + (i+1)*inc_y;
1445
1446 dcomplex* zeta11 = buff_z + (i )*inc_z;
1447 dcomplex* z21 = buff_z + (i+1)*inc_z;
1448
1449 dcomplex* a12p_t = a12p + (0 )*inc_ap;
1450 dcomplex* a12p_b = a12p + (1 )*inc_ap;
1451
1452 dcomplex* v21_t = v21 + (0 )*inc_v;
1453 dcomplex* v21_b = v21 + (1 )*inc_v;
1454
1455 dcomplex* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
1456 dcomplex* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
1457
1458 dcomplex* A22_l = A22 + (0 )*cs_A + (0 )*rs_A;
1459
1460 int m_ahead = m_A - i - 1;
1461 int n_ahead = n_A - i - 1;
1462 int m_behind = i;
1463 int n_behind = i;
1464
1465 /*------------------------------------------------------------*/
1466
1467 if ( m_behind > 0 )
1468 {
1469 // FLA_Copy( upsilon11, minus_upsilon11 );
1470 // FLA_Scal( FLA_MINUS_ONE, minus_upsilon11 );
1472
1473 // FLA_Copy( zeta11, minus_zeta11 );
1474 // FLA_Scal( FLA_MINUS_ONE, minus_zeta11 );
1476
1477 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, psi11, minus_conj_psi11 );
1478 // FLA_Scal( FLA_MINUS_ONE, minus_conj_psi11 );
1481
1482 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, nu11, minus_conj_nu11 );
1483 // FLA_Scal( FLA_MINUS_ONE, minus_conj_nu11 );
1486
1487 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, upsilon11, alpha11 );
1488 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11, zeta11, alpha11 );
1490 1,
1492 upsilon11, 1,
1493 alpha11, 1 );
1495 1,
1497 zeta11, 1,
1498 alpha11, 1 );
1499
1500 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, u21, a21 );
1501 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11, z21, a21 );
1503 m_ahead,
1505 u21, inc_u,
1506 a21, rs_A );
1508 m_ahead,
1510 z21, inc_z,
1511 a21, rs_A );
1512
1513 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_upsilon11, y21, a12t );
1514 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_zeta11, v21, a12t );
1516 n_ahead,
1518 y21, inc_y,
1519 a12t, cs_A );
1521 n_ahead,
1522 &minus_zeta11,
1523 v21, inc_v,
1524 a12t, cs_A );
1525 }
1526
1527 // FLA_Househ2_UT( FLA_LEFT,
1528 // alpha11,
1529 // a21, tau11 );
1530 // FLA_Copy( a21, u21p );
1532 alpha11,
1533 a21, rs_A,
1534 tau11 );
1536 m_ahead,
1537 a21, rs_A,
1538 u21p, inc_up );
1539
1540 if ( n_ahead > 0 )
1541 {
1542 // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
1543 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
1545
1546 // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
1547 // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
1549 n_ahead,
1550 a12t, cs_A,
1551 a12p, inc_ap );
1553 n_ahead,
1555 a12t, cs_A,
1556 a12p, inc_ap );
1557 }
1558
1559 if ( m_behind > 0 )
1560 {
1561 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u21, y21, A22 );
1562 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
1565 m_ahead,
1566 n_ahead,
1567 buff_m1,
1568 u21, inc_u,
1569 y21, inc_y,
1570 A22, rs_A, cs_A );
1573 m_ahead,
1574 n_ahead,
1575 buff_m1,
1576 z21, inc_z,
1577 v21, inc_v,
1578 A22, rs_A, cs_A );
1579 }
1580
1581 if ( n_ahead > 0 )
1582 {
1583 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, u21p, FLA_ZERO, y21 );
1586 m_ahead,
1587 n_ahead,
1588 buff_1,
1589 A22, rs_A, cs_A,
1590 u21p, inc_up,
1591 buff_0,
1592 y21, inc_y );
1593
1594 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
1596 n_ahead,
1598 y21, inc_y,
1599 a12p, inc_ap );
1600
1601 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
1604 m_ahead,
1605 n_ahead,
1606 buff_1,
1607 A22, rs_A, cs_A,
1608 a12p, inc_ap,
1609 buff_0,
1610 w21, inc_w );
1611
1612 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
1614 n_ahead,
1615 buff_1,
1616 a12t, cs_A,
1617 y21, inc_y );
1618
1619 // FLA_Househ2s_UT( FLA_RIGHT,
1620 // a12p_t,
1621 // a12p_b,
1622 // alpha12, psi11_minus_alpha12, sigma11 );
1624 a12p_t,
1625 a12p_b, inc_ap,
1626 &alpha12,
1628 sigma11 );
1629
1630 // FLA_Copy( a12p, v21 );
1631 // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
1632 // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
1633 // FLA_Conjugate( v21_b );
1635 n_ahead,
1636 a12p, inc_ap,
1637 v21, inc_v );
1640 n_ahead,
1642 v21, inc_v );
1643 bl1_zconjv( n_ahead - 1,
1644 v21_b, inc_v );
1645
1646 // FLA_Copyt( FLA_NO_TRANSPOSE, alpha12, a12t_l );
1647 // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
1648 *a12t_l = alpha12;
1650 n_ahead - 1,
1651 v21_b, inc_v,
1652 a12t_r, cs_A );
1653 }
1654
1655 // FLA_Copy( u21p, u21 );
1657 m_ahead,
1658 u21p, inc_up,
1659 u21, inc_u );
1660
1661 if ( n_ahead > 0 )
1662 {
1663 // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
1664 // FLA_Scal( FLA_MINUS_ONE, beta );
1665 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
1667 n_ahead,
1668 y21, inc_y,
1669 v21, inc_v,
1670 &beta );
1672
1673 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
1674 // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
1677
1678 // FLA_Copy( w21, z21 );
1679 // FLA_Axpy( minus_conj_alpha12, A22_l, z21 );
1680 // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
1681 // FLA_Axpy( beta, u21, z21 );
1683 m_ahead,
1684 w21, inc_w,
1685 z21, inc_z );
1687 m_ahead,
1689 A22_l, rs_A,
1690 z21, inc_z );
1692 m_ahead,
1694 z21, inc_z );
1696 m_ahead,
1697 &beta,
1698 u21, inc_u,
1699 z21, inc_z );
1700
1701 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
1702 // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
1704 n_ahead,
1705 tau11,
1706 y21, inc_y );
1708 m_ahead,
1709 sigma11,
1710 z21, inc_z );
1711
1712 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
1715 m_behind,
1716 n_ahead,
1717 buff_1,
1718 A02, rs_A, cs_A,
1719 v21, inc_v,
1720 buff_0,
1721 s01, rs_S );
1722 }
1723
1724 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
1725 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
1727 n_behind,
1728 a10t, cs_A,
1729 t01, rs_T );
1732 m_ahead,
1733 n_behind,
1734 buff_1,
1735 A20, rs_A, cs_A,
1736 u21, inc_u,
1737 buff_1,
1738 t01, rs_T );
1739
1740 if ( m_behind + 1 == b_alg && n_ahead > 0 )
1741 {
1742 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u21, y21, A22 );
1743 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
1746 m_ahead,
1747 n_ahead,
1748 buff_m1,
1749 u21, inc_u,
1750 y21, inc_y,
1751 A22, rs_A, cs_A );
1754 m_ahead,
1755 n_ahead,
1756 buff_m1,
1757 z21, inc_z,
1758 v21, inc_v,
1759 A22, rs_A, cs_A );
1760 }
1761
1762 /*------------------------------------------------------------*/
1763
1764 }
1765
1766 // FLA_Obj_free( &w );
1767 // FLA_Obj_free( &ap );
1768 // FLA_Obj_free( &u );
1769 // FLA_Obj_free( &up );
1770 // FLA_Obj_free( &v );
1771 // FLA_Obj_free( &y );
1772 // FLA_Obj_free( &z );
1773 FLA_free( buff_w );
1774 FLA_free( buff_ap );
1775 FLA_free( buff_u );
1776 FLA_free( buff_up );
1777 FLA_free( buff_v );
1778 FLA_free( buff_y );
1779 FLA_free( buff_z );
1780
1781 return FLA_SUCCESS;
1782}

References bl1_zaxpyv(), bl1_zconjv(), bl1_zcopyv(), bl1_zdot(), bl1_zgemv(), bl1_zger(), bl1_zinvscalv(), bl1_zscals(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opz(), FLA_Househ2s_UT_r_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var3().

◆ FLA_Bidiag_UT_u_step_opz_var4()

FLA_Error FLA_Bidiag_UT_u_step_opz_var4 ( int  m_A,
int  n_A,
int  m_TS,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_Y,
int  rs_Y,
int  cs_Y,
dcomplex buff_Z,
int  rs_Z,
int  cs_Z,
dcomplex buff_T,
int  rs_T,
int  cs_T,
dcomplex buff_S,
int  rs_S,
int  cs_S 
)
1807{
1811
1816 dcomplex beta;
1818 int i;
1819
1820 // b_alg = FLA_Obj_length( T );
1821 int b_alg = m_TS;
1822
1823 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
1824 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &al );
1825 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
1826 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
1827 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
1828 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
1829 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &d );
1830 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &e );
1831 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
1832 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &g );
1833 dcomplex* buff_w = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1834 dcomplex* buff_al = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1835 dcomplex* buff_ap = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1836 dcomplex* buff_u = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1837 dcomplex* buff_up = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1838 dcomplex* buff_v = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1839 dcomplex* buff_d = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1840 dcomplex* buff_e = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1841 dcomplex* buff_f = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1842 dcomplex* buff_g = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1843 int inc_w = 1;
1844 int inc_al = 1;
1845 int inc_ap = 1;
1846 int inc_u = 1;
1847 int inc_up = 1;
1848 int inc_v = 1;
1849 int inc_d = 1;
1850 int inc_e = 1;
1851 int inc_f = 1;
1852 int inc_g = 1;
1853
1854 // FLA_Set( FLA_ZERO, Y );
1855 // FLA_Set( FLA_ZERO, Z );
1856 bl1_zsetm( n_A,
1857 b_alg,
1858 buff_0,
1859 buff_Y, rs_Y, cs_Y );
1860 bl1_zsetm( m_A,
1861 b_alg,
1862 buff_0,
1863 buff_Z, rs_Z, cs_Z );
1864
1865 for ( i = 0; i < b_alg; ++i )
1866 {
1867 dcomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
1868 dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
1869 dcomplex* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
1870 dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
1871 dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
1872 dcomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
1873 dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
1874 dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
1875
1876 dcomplex* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
1877 dcomplex* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
1878 dcomplex* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
1879
1880 dcomplex* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
1881 dcomplex* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
1882 dcomplex* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
1883
1884 dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
1885 dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
1886
1887 dcomplex* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
1888 dcomplex* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
1889
1890 dcomplex* w21 = buff_w + (i+1)*inc_w;
1891
1892 dcomplex* a22l = buff_al + (i+1)*inc_al;
1893
1894 dcomplex* a12p = buff_ap + (i+1)*inc_ap;
1895
1896 dcomplex* u21 = buff_u + (i+1)*inc_u;
1897
1898 dcomplex* u21p = buff_up + (i+1)*inc_up;
1899
1900 dcomplex* v21 = buff_v + (i+1)*inc_v;
1901
1902 dcomplex* d0 = buff_d + (0 )*inc_d;
1903
1904 dcomplex* e0 = buff_e + (0 )*inc_e;
1905
1906 dcomplex* f0 = buff_f + (0 )*inc_f;
1907
1908 dcomplex* g0 = buff_g + (0 )*inc_g;
1909
1910 dcomplex* a12p_t = a12p + (0 )*inc_ap;
1911 dcomplex* a12p_b = a12p + (1 )*inc_ap;
1912
1913 dcomplex* v21_t = v21 + (0 )*inc_v;
1914 dcomplex* v21_b = v21 + (1 )*inc_v;
1915
1916 dcomplex* a01_b = a01 + (0 )*cs_A + (i-1)*rs_A;
1917
1918 dcomplex* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
1919 dcomplex* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
1920
1921 dcomplex* A02_l = A02 + (0 )*cs_A + (0 )*rs_A;
1922
1923 dcomplex* A22_l = A22 + (0 )*cs_A + (0 )*rs_A;
1924
1925 dcomplex* Y20_t = Y20 + (0 )*cs_Y + (0 )*rs_Y;
1926
1927 dcomplex* ABL = a10t;
1928 dcomplex* ZBL = z10t;
1929
1930 dcomplex* a2 = alpha11;
1931
1932 int m_ahead = m_A - i - 1;
1933 int n_ahead = n_A - i - 1;
1934 int m_behind = i;
1935 int n_behind = i;
1936
1937 /*------------------------------------------------------------*/
1938
1939 if ( m_behind > 0 )
1940 {
1941 // FLA_Copy( a01_b, last_elem );
1942 // FLA_Set( FLA_ONE, a01_b );
1943 last_elem = *a01_b;
1944 *a01_b = *buff_1;
1945 }
1946
1947 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
1948 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a01, FLA_ONE, a2 );
1951 m_ahead + 1,
1952 n_behind,
1953 buff_m1,
1954 ABL, rs_A, cs_A,
1955 y10t, cs_Y,
1956 buff_1,
1957 a2, rs_A );
1960 m_ahead + 1,
1961 n_behind,
1962 buff_m1,
1963 ZBL, rs_Z, cs_Z,
1964 a01, rs_A,
1965 buff_1,
1966 a2, rs_A );
1967
1968 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
1969 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, A02, z10t, FLA_ONE, a12t );
1972 n_ahead,
1973 n_behind,
1974 buff_m1,
1975 Y20, rs_Y, cs_Y,
1976 a10t, cs_A,
1977 buff_1,
1978 a12t, cs_A );
1981 m_behind,
1982 n_ahead,
1983 buff_m1,
1984 A02, rs_A, cs_A,
1985 z10t, cs_Z,
1986 buff_1,
1987 a12t, cs_A );
1988
1989 if ( m_behind > 0 )
1990 {
1991 // FLA_Copy( last_elem, a01_b );
1992 *a01_b = last_elem;
1993 }
1994
1995 // FLA_Househ2_UT( FLA_LEFT,
1996 // alpha11,
1997 // a21, tau11 );
1998 // FLA_Copy( a21, u21p );
2000 alpha11,
2001 a21, rs_A,
2002 tau11 );
2004 m_ahead,
2005 a21, rs_A,
2006 u21p, inc_up );
2007
2008 if ( n_ahead > 0 )
2009 {
2010 // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
2011 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
2013
2014 // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
2015 // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
2017 n_ahead,
2018 a12t, cs_A,
2019 a12p, inc_ap );
2021 n_ahead,
2023 a12t, cs_A,
2024 a12p, inc_ap );
2025
2026 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21p, FLA_ZERO, d0 );
2027 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, u21p, FLA_ZERO, e0 );
2030 m_ahead,
2031 n_behind,
2032 buff_1,
2033 A20, rs_A, cs_A,
2034 u21p, inc_up,
2035 buff_0,
2036 d0, inc_d );
2039 m_ahead,
2040 n_behind,
2041 buff_1,
2042 Z20, rs_Z, cs_Z,
2043 u21p, inc_up,
2044 buff_0,
2045 e0, inc_e );
2046
2047 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
2048 // FLA_Axpy( FLA_ONE, d0, t01 );
2050 n_behind,
2051 a10t, cs_A,
2052 t01, rs_T );
2054 n_behind,
2055 buff_1,
2056 d0, inc_d,
2057 t01, rs_T );
2058
2059 // FLA_Set( FLA_ZERO, y21 );
2060 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
2061 // FLA_Gemv( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, e0, FLA_ONE, y21 );
2063 buff_0,
2064 y21, rs_Y );
2067 n_ahead,
2068 n_behind,
2069 buff_m1,
2070 Y20, rs_Y, cs_Y,
2071 d0, inc_d,
2072 buff_1,
2073 y21, rs_Y );
2076 m_behind,
2077 n_ahead,
2078 buff_m1,
2079 A02, rs_A, cs_A,
2080 e0, inc_e,
2081 buff_1,
2082 y21, rs_Y );
2083
2084 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, u21p, FLA_ONE, y21 );
2087 m_ahead,
2088 n_ahead,
2089 buff_1,
2090 A22, rs_A, cs_A,
2091 u21p, inc_up,
2092 buff_1,
2093 y21, rs_Y );
2094
2095 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
2097 n_ahead,
2099 y21, rs_Y,
2100 a12p, inc_ap );
2101
2102 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
2105 m_ahead,
2106 n_ahead,
2107 buff_1,
2108 A22, rs_A, cs_A,
2109 a12p, inc_ap,
2110 buff_0,
2111 w21, inc_w );
2112
2113 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, Y20, a12p, FLA_ZERO, f0 );
2114 // FLA_Gemvc( FLA_CONJ_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A02, a12p, FLA_ZERO, g0 );
2117 n_ahead,
2118 n_behind,
2119 buff_1,
2120 Y20, rs_Y, cs_Y,
2121 a12p, inc_ap,
2122 buff_0,
2123 f0, inc_f );
2126 m_behind,
2127 n_ahead,
2128 buff_1,
2129 A02, rs_A, cs_A,
2130 a12p, inc_ap,
2131 buff_0,
2132 g0, inc_g );
2133
2134 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, w21 );
2135 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, g0, FLA_ONE, w21 );
2138 m_ahead,
2139 n_behind,
2140 buff_m1,
2141 A20, rs_A, cs_A,
2142 f0, inc_f,
2143 buff_1,
2144 w21, inc_w );
2147 m_ahead,
2148 n_behind,
2149 buff_m1,
2150 Z20, rs_Z, cs_Z,
2151 g0, inc_g,
2152 buff_1,
2153 w21, inc_w );
2154
2155 // FLA_Copy( A22_l, a22l );
2156 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, A20, Y20_t, FLA_ONE, a22l );
2157 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, Z20, A02_l, FLA_ONE, a22l );
2159 m_ahead,
2160 A22_l, rs_A,
2161 a22l, inc_al );
2164 m_ahead,
2165 n_behind,
2166 buff_m1,
2167 A20, rs_A, cs_A,
2168 Y20_t, cs_Y,
2169 buff_1,
2170 a22l, inc_al );
2173 m_ahead,
2174 n_behind,
2175 buff_m1,
2176 Z20, rs_Z, cs_Z,
2177 A02_l, rs_A,
2178 buff_1,
2179 a22l, inc_al );
2180
2181 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
2183 n_ahead,
2184 buff_1,
2185 a12t, cs_A,
2186 y21, rs_Y );
2187
2188 // FLA_Househ2s_UT( FLA_RIGHT,
2189 // a12p_t,
2190 // a12p_b,
2191 // alpha12, psi11_minus_alpha12, sigma11 );
2193 a12p_t,
2194 a12p_b, inc_ap,
2195 &alpha12,
2197 sigma11 );
2198
2199 // FLA_Copy( a12p, v21 );
2200 // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
2201 // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
2202 // FLA_Conjugate( v21_b );
2204 n_ahead,
2205 a12p, inc_ap,
2206 v21, inc_v );
2209 n_ahead,
2211 v21, inc_v );
2212 bl1_zconjv( n_ahead - 1,
2213 v21_b, inc_v );
2214
2215 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
2216 // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
2219
2220 // FLA_Copy( g0, s01 );
2221 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_conj_alpha12, A02_l, s01 );
2222 // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, s01 );
2224 n_behind,
2225 g0, inc_g,
2226 s01, rs_S );
2228 n_behind,
2230 A02_l, rs_A,
2231 s01, rs_S );
2233 n_behind,
2235 s01, rs_S );
2236
2237 // FLA_Copyt( FLA_NO_TRANSPOSE, alpha12, a12t_l );
2238 // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
2239 *a12t_l = alpha12;
2241 n_ahead - 1,
2242 v21_b, inc_v,
2243 a12t_r, cs_A );
2244 }
2245
2246 // FLA_Copy( u21p, u21 );
2248 m_ahead,
2249 u21p, inc_up,
2250 u21, inc_u );
2251
2252 if ( n_ahead > 0 )
2253 {
2254 // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
2255 // FLA_Scal( FLA_MINUS_ONE, beta );
2256 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
2258 n_ahead,
2259 y21, rs_Y,
2260 v21, inc_v,
2261 &beta );
2263
2264 // FLA_Copy( w21, z21 );
2265 // FLA_Axpy( minus_conj_alpha12, a22l, z21 );
2266 // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
2267 // FLA_Axpy( beta, u21, z21 );
2269 m_ahead,
2270 w21, inc_w,
2271 z21, rs_Z );
2273 m_ahead,
2275 a22l, inc_al,
2276 z21, rs_Z );
2278 m_ahead,
2280 z21, rs_Z );
2282 m_ahead,
2283 &beta,
2284 u21, inc_u,
2285 z21, rs_Z );
2286
2287 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
2288 // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
2290 n_ahead,
2291 tau11,
2292 y21, rs_Y );
2294 m_ahead,
2295 sigma11,
2296 z21, rs_Z );
2297 }
2298 else // if ( n_ahead == 0 )
2299 {
2300 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
2301 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
2303 n_behind,
2304 a10t, cs_A,
2305 t01, rs_T );
2308 m_ahead,
2309 n_behind,
2310 buff_1,
2311 A20, rs_A, cs_A,
2312 u21, inc_u,
2313 buff_1,
2314 t01, rs_T );
2315 }
2316
2317 /*------------------------------------------------------------*/
2318
2319 }
2320
2321 // FLA_Obj_free( &w );
2322 // FLA_Obj_free( &al );
2323 // FLA_Obj_free( &ap );
2324 // FLA_Obj_free( &u );
2325 // FLA_Obj_free( &up );
2326 // FLA_Obj_free( &v );
2327 // FLA_Obj_free( &d );
2328 // FLA_Obj_free( &e );
2329 // FLA_Obj_free( &f );
2330 // FLA_Obj_free( &g );
2331 FLA_free( buff_w );
2332 FLA_free( buff_al );
2333 FLA_free( buff_ap );
2334 FLA_free( buff_u );
2335 FLA_free( buff_up );
2336 FLA_free( buff_v );
2337 FLA_free( buff_d );
2338 FLA_free( buff_e );
2339 FLA_free( buff_f );
2340 FLA_free( buff_g );
2341
2342 return FLA_SUCCESS;
2343}

References bl1_zaxpyv(), bl1_zconjv(), bl1_zcopyv(), bl1_zdot(), bl1_zgemv(), bl1_zinvscalv(), bl1_zscals(), bl1_zsetm(), bl1_zsetv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opz(), FLA_Househ2s_UT_r_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var4().

◆ FLA_Bidiag_UT_u_step_opz_var5()

FLA_Error FLA_Bidiag_UT_u_step_opz_var5 ( int  m_A,
int  n_A,
int  m_TS,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_Y,
int  rs_Y,
int  cs_Y,
dcomplex buff_Z,
int  rs_Z,
int  cs_Z,
dcomplex buff_T,
int  rs_T,
int  cs_T,
dcomplex buff_S,
int  rs_S,
int  cs_S 
)
1348{
1352
1353 dcomplex beta;
1355 int i;
1356
1357 // b_alg = FLA_Obj_length( T );
1358 int b_alg = m_TS;
1359
1360 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
1361 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
1362 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &d );
1363 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &e );
1364 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
1365 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &g );
1366 dcomplex* buff_u = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1367 dcomplex* buff_v = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1368 dcomplex* buff_d = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1369 dcomplex* buff_e = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1370 dcomplex* buff_f = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1371 dcomplex* buff_g = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1372 int inc_u = 1;
1373 int inc_v = 1;
1374 int inc_d = 1;
1375 int inc_e = 1;
1376 int inc_f = 1;
1377 int inc_g = 1;
1378
1379 // FLA_Set( FLA_ZERO, Y );
1380 // FLA_Set( FLA_ZERO, Z );
1381 bl1_zsetm( n_A,
1382 b_alg,
1383 buff_0,
1384 buff_Y, rs_Y, cs_Y );
1385 bl1_zsetm( m_A,
1386 b_alg,
1387 buff_0,
1388 buff_Z, rs_Z, cs_Z );
1389
1390 for ( i = 0; i < b_alg; ++i )
1391 {
1392 dcomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
1393 dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
1394 dcomplex* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
1395 dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
1396 dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
1397 dcomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
1398 dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
1399 dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
1400
1401 dcomplex* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
1402 dcomplex* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
1403 dcomplex* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
1404
1405 dcomplex* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
1406 dcomplex* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
1407 dcomplex* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
1408
1409 dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
1410 dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
1411
1412 dcomplex* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
1413 dcomplex* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
1414
1415 dcomplex* u21 = buff_u + (i+1)*inc_u;
1416
1417 dcomplex* v21 = buff_v + (i+1)*inc_v;
1418
1419 dcomplex* d0 = buff_d + (0 )*inc_d;
1420
1421 dcomplex* e0 = buff_e + (0 )*inc_e;
1422
1423 dcomplex* f0 = buff_f + (0 )*inc_f;
1424
1425 dcomplex* g0 = buff_g + (0 )*inc_g;
1426
1427 dcomplex* v21_t = v21 + (0 )*inc_v;
1428 dcomplex* v21_b = v21 + (1 )*inc_v;
1429
1430 dcomplex* a01_b = a01 + (0 )*cs_A + (i-1)*rs_A;
1431
1432 dcomplex* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
1433 dcomplex* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
1434
1435 dcomplex* ABL = a10t;
1436 dcomplex* ZBL = z10t;
1437
1438 dcomplex* a2 = alpha11;
1439
1440 int m_ahead = m_A - i - 1;
1441 int n_ahead = n_A - i - 1;
1442 int m_behind = i;
1443 int n_behind = i;
1444
1445 /*------------------------------------------------------------*/
1446
1447 if ( m_behind > 0 )
1448 {
1449 // FLA_Copy( a01_b, last_elem );
1450 // FLA_Set( FLA_ONE, a01_b );
1451 last_elem = *a01_b;
1452 *a01_b = *buff_1;
1453 }
1454
1455 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
1456 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a01, FLA_ONE, a2 );
1459 m_ahead + 1,
1460 n_behind,
1461 buff_m1,
1462 ABL, rs_A, cs_A,
1463 y10t, cs_Y,
1464 buff_1,
1465 a2, rs_A );
1468 m_ahead + 1,
1469 n_behind,
1470 buff_m1,
1471 ZBL, rs_Z, cs_Z,
1472 a01, rs_A,
1473 buff_1,
1474 a2, rs_A );
1475
1476 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
1477 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, A02, z10t, FLA_ONE, a12t );
1480 n_ahead,
1481 n_behind,
1482 buff_m1,
1483 Y20, rs_Y, cs_Y,
1484 a10t, cs_A,
1485 buff_1,
1486 a12t, cs_A );
1489 m_behind,
1490 n_ahead,
1491 buff_m1,
1492 A02, rs_A, cs_A,
1493 z10t, cs_Z,
1494 buff_1,
1495 a12t, cs_A );
1496
1497 if ( m_behind > 0 )
1498 {
1499 // FLA_Copy( last_elem, a01_b );
1500 *a01_b = last_elem;
1501 }
1502
1503 // FLA_Househ2_UT( FLA_LEFT,
1504 // alpha11,
1505 // a21, tau11 );
1506 // FLA_Copy( a21, u21 );
1508 alpha11,
1509 a21, rs_A,
1510 tau11 );
1512 m_ahead,
1513 a21, rs_A,
1514 u21, inc_u );
1515
1516 if ( n_ahead > 0 )
1517 {
1518 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a12t, y21 );
1519 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, u21, FLA_ONE, y21 );
1521 n_ahead,
1522 a12t, cs_A,
1523 y21, rs_Y );
1526 m_ahead,
1527 n_ahead,
1528 buff_1,
1529 A22, rs_A, cs_A,
1530 u21, inc_u,
1531 buff_1,
1532 y21, rs_Y );
1533
1534 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ZERO, d0 );
1535 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, u21, FLA_ZERO, e0 );
1538 m_ahead,
1539 n_behind,
1540 buff_1,
1541 A20, rs_A, cs_A,
1542 u21, inc_u,
1543 buff_0,
1544 d0, inc_d );
1547 m_ahead,
1548 n_behind,
1549 buff_1,
1550 Z20, rs_Z, cs_Z,
1551 u21, inc_u,
1552 buff_0,
1553 e0, inc_e );
1554
1555 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
1556 // FLA_Axpy( FLA_ONE, d0, t01 );
1558 n_behind,
1559 a10t, cs_A,
1560 t01, rs_T );
1562 n_behind,
1563 buff_1,
1564 d0, inc_d,
1565 t01, rs_T );
1566
1567 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
1568 // FLA_Gemv( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, e0, FLA_ONE, y21 );
1571 n_ahead,
1572 n_behind,
1573 buff_m1,
1574 Y20, rs_Y, cs_Y,
1575 d0, inc_d,
1576 buff_1,
1577 y21, rs_Y );
1580 m_behind,
1581 n_ahead,
1582 buff_m1,
1583 A02, rs_A, cs_A,
1584 e0, inc_e,
1585 buff_1,
1586 y21, rs_Y );
1587
1588 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
1590 n_ahead,
1591 tau11,
1592 y21, rs_Y );
1593
1594 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, y21, a12t );
1596 n_ahead,
1597 buff_m1,
1598 y21, rs_Y,
1599 a12t, cs_A );
1600
1601 // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
1603 a12t_l,
1604 a12t_r, cs_A,
1605 sigma11 );
1606
1607 // FLA_Set( FLA_ONE, v21_t );
1608 // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
1609 *v21_t = *buff_1;
1611 n_ahead - 1,
1612 a12t_r, cs_A,
1613 v21_b, inc_v );
1614
1615 // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
1616 // FLA_Scal( FLA_MINUS_ONE, beta );
1618 n_ahead,
1619 y21, rs_Y,
1620 v21, inc_v,
1621 &beta );
1622 bl1_zscals( buff_m1, &beta );
1623
1624 // FLA_Copy( u21, z21 );
1625 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, v21, beta, z21 );
1627 m_ahead,
1628 u21, inc_u,
1629 z21, rs_Z );
1632 m_ahead,
1633 n_ahead,
1634 buff_1,
1635 A22, rs_A, cs_A,
1636 v21, inc_v,
1637 &beta,
1638 z21, rs_Z );
1639
1640 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, Y20, v21, FLA_ZERO, f0 );
1641 // FLA_Gemvc( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, g0 );
1644 n_ahead,
1645 m_behind,
1646 buff_1,
1647 Y20, rs_Y, cs_Y,
1648 v21, inc_v,
1649 buff_0,
1650 f0, inc_f );
1653 m_behind,
1654 n_ahead,
1655 buff_1,
1656 A02, rs_A, cs_A,
1657 v21, inc_v,
1658 buff_0,
1659 g0, inc_g );
1660
1661 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, z21 );
1662 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, g0, FLA_ONE, z21 );
1665 m_ahead,
1666 n_behind,
1667 buff_m1,
1668 A20, rs_A, cs_A,
1669 f0, inc_f,
1670 buff_1,
1671 z21, rs_Z );
1674 m_ahead,
1675 n_behind,
1676 buff_m1,
1677 Z20, rs_Z, cs_Z,
1678 g0, inc_g,
1679 buff_1,
1680 z21, rs_Z );
1681
1682 // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
1684 m_ahead,
1685 sigma11,
1686 z21, rs_Z );
1687
1688 // FLA_Copy( g0, s01 );
1690 n_behind,
1691 g0, inc_g,
1692 s01, rs_S );
1693 }
1694 else // if ( n_ahead == 0 )
1695 {
1696 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
1697 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
1699 n_behind,
1700 a10t, cs_A,
1701 t01, rs_T );
1704 m_ahead,
1705 n_behind,
1706 buff_1,
1707 A20, rs_A, cs_A,
1708 u21, inc_u,
1709 buff_1,
1710 t01, rs_T );
1711 }
1712
1713 /*------------------------------------------------------------*/
1714
1715 }
1716
1717 // FLA_Obj_free( &u );
1718 // FLA_Obj_free( &v );
1719 // FLA_Obj_free( &d );
1720 // FLA_Obj_free( &e );
1721 // FLA_Obj_free( &f );
1722 // FLA_Obj_free( &g );
1723 FLA_free( buff_u );
1724 FLA_free( buff_v );
1725 FLA_free( buff_d );
1726 FLA_free( buff_e );
1727 FLA_free( buff_f );
1728 FLA_free( buff_g );
1729
1730 return FLA_SUCCESS;
1731}

References bl1_zaxpyv(), bl1_zcopyv(), bl1_zdot(), bl1_zgemv(), bl1_zinvscalv(), bl1_zscals(), bl1_zsetm(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opz(), FLA_Househ2_UT_r_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var5().

◆ FLA_Bidiag_UT_u_step_unb_var1()

FLA_Error FLA_Bidiag_UT_u_step_unb_var1 ( FLA_Obj  A,
FLA_Obj  TU,
FLA_Obj  TV 
)
19{
20 FLA_Obj ATL, ATR, A00, a01, A02,
22 A20, a21, A22;
23 FLA_Obj TTL, TTR, T00, t01, T02,
24 TBL, TBR, t10t, tau11, t12t,
25 T20, t21, T22;
26 FLA_Obj STL, STR, S00, s01, S02,
28 S20, s21, S22;
29 FLA_Obj vT, v01,
30 vB, nu11,
31 v21;
32 FLA_Obj v;
33
37 v21_b;
38
40 dim_t n_A;
42
43
45
47 n_A = FLA_Obj_width( A );
48
49 FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
50
51 FLA_Part_2x2( A, &ATL, &ATR,
52 &ABL, &ABR, 0, 0, FLA_TL );
53 FLA_Part_2x2( T, &TTL, &TTR,
54 &TBL, &TBR, 0, 0, FLA_TL );
55 FLA_Part_2x2( S, &STL, &STR,
56 &SBL, &SBR, 0, 0, FLA_TL );
57 FLA_Part_2x1( v, &vT,
58 &vB, 0, FLA_TOP );
59
60 while ( FLA_Obj_length( ATL ) < b_alg )
61 {
62 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02,
63 /* ************* */ /* ************************** */
64 &a10t, /**/ &alpha11, &a12t,
65 ABL, /**/ ABR, &A20, /**/ &a21, &A22,
66 1, 1, FLA_BR );
67 FLA_Repart_2x2_to_3x3( TTL, /**/ TTR, &T00, /**/ &t01, &T02,
68 /* ************* */ /* ************************** */
69 &t10t, /**/ &tau11, &t12t,
70 TBL, /**/ TBR, &T20, /**/ &t21, &T22,
71 1, 1, FLA_BR );
72 FLA_Repart_2x2_to_3x3( STL, /**/ STR, &S00, /**/ &s01, &S02,
73 /* ************* */ /* ************************** */
74 &s10t, /**/ &sigma11, &s12t,
75 SBL, /**/ SBR, &S20, /**/ &s21, &S22,
76 1, 1, FLA_BR );
78 /* ** */ /* ***** */
79 &nu11,
80 vB, &v21, 1, FLA_BOTTOM );
81
82 /*------------------------------------------------------------*/
83
84 // [ alpha11_new, u21, tau11 ] = House2( alpha11, a21 );
86 alpha11,
87 a21, tau11 );
88
89 if ( FLA_Obj_width( A22 ) > 0 )
90 {
94 &v21_b, 1, FLA_TOP );
95
96 // Apply H from the left to a12t and A22.
98
99 // [ alpha12t, u12t_r, tau11 ] = House2( a12t_l, a12t_r );
101
102 // v21_t = 1;
103 // v21_b = a12t_r;
106
107 // Apply H from the right to A22.
109
110 // s01 = conj(V02) * v21;
112 }
113
114 // t01 = a10t' + U20' * u21;
117
118 /*------------------------------------------------------------*/
119
120 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02,
121 a10t, alpha11, /**/ a12t,
122 /* ************** */ /* ************************ */
123 &ABL, /**/ &ABR, A20, a21, /**/ A22,
124 FLA_TL );
125 FLA_Cont_with_3x3_to_2x2( &TTL, /**/ &TTR, T00, t01, /**/ T02,
126 t10t, tau11, /**/ t12t,
127 /* ************** */ /* ************************ */
128 &TBL, /**/ &TBR, T20, t21, /**/ T22,
129 FLA_TL );
130 FLA_Cont_with_3x3_to_2x2( &STL, /**/ &STR, S00, s01, /**/ S02,
131 s10t, sigma11, /**/ s12t,
132 /* ************** */ /* ************************ */
133 &SBL, /**/ &SBR, S20, s21, /**/ S22,
134 FLA_TL );
136 nu11,
137 /* ** */ /* ***** */
138 &vB, v21, FLA_TOP );
139 }
140
141 FLA_Obj_free( &v );
142
143 return FLA_SUCCESS;
144}
FLA_Error FLA_Gemv(FLA_Trans transa, FLA_Obj alpha, FLA_Obj A, FLA_Obj x, FLA_Obj beta, FLA_Obj y)
Definition FLA_Gemv.c:15
FLA_Error FLA_Apply_H2_UT(FLA_Side side, FLA_Obj tau, FLA_Obj u2, FLA_Obj a1, FLA_Obj A2)
Definition FLA_Apply_H2_UT.c:13
FLA_Error FLA_Househ2_UT(FLA_Side side, FLA_Obj chi_1, FLA_Obj x2, FLA_Obj tau)
Definition FLA_Househ2_UT.c:16

References FLA_Apply_H2_UT(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copyt(), FLA_Gemv(), FLA_Househ2_UT(), FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_Obj_width(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Set(), FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_unb_var1().

◆ FLA_Bidiag_UT_u_step_unb_var2()

FLA_Error FLA_Bidiag_UT_u_step_unb_var2 ( FLA_Obj  A,
FLA_Obj  TU,
FLA_Obj  TV 
)
19{
20 FLA_Obj ATL, ATR, A00, a01, A02,
22 A20, a21, A22;
23 FLA_Obj TTL, TTR, T00, t01, T02,
24 TBL, TBR, t10t, tau11, t12t,
25 T20, t21, T22;
26 FLA_Obj STL, STR, S00, s01, S02,
28 S20, s21, S22;
29 FLA_Obj yT, y01,
30 yB, psi11,
31 y21;
32 FLA_Obj zT, z01,
33 zB, zeta11,
34 z21;
35 FLA_Obj vT, v01,
36 vB, nu11,
37 v21;
38 FLA_Obj v, y, z;
39
41
44 v21_b;
45
47 dim_t m_A, n_A;
49
50
52
54 m_A = FLA_Obj_length( A );
55 n_A = FLA_Obj_width( A );
56
57 FLA_Obj_create( datatype_A, 1, 1, 0, 0, &beta );
58 FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
59 FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
60 FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
61
62 FLA_Part_2x2( A, &ATL, &ATR,
63 &ABL, &ABR, 0, 0, FLA_TL );
64 FLA_Part_2x2( T, &TTL, &TTR,
65 &TBL, &TBR, 0, 0, FLA_TL );
66 FLA_Part_2x2( S, &STL, &STR,
67 &SBL, &SBR, 0, 0, FLA_TL );
68 FLA_Part_2x1( v, &vT,
69 &vB, 0, FLA_TOP );
70 FLA_Part_2x1( y, &yT,
71 &yB, 0, FLA_TOP );
72 FLA_Part_2x1( z, &zT,
73 &zB, 0, FLA_TOP );
74
75 while ( FLA_Obj_length( ATL ) < b_alg )
76 {
77 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02,
78 /* ************* */ /* ************************** */
79 &a10t, /**/ &alpha11, &a12t,
80 ABL, /**/ ABR, &A20, /**/ &a21, &A22,
81 1, 1, FLA_BR );
82 FLA_Repart_2x2_to_3x3( TTL, /**/ TTR, &T00, /**/ &t01, &T02,
83 /* ************* */ /* ************************** */
84 &t10t, /**/ &tau11, &t12t,
85 TBL, /**/ TBR, &T20, /**/ &t21, &T22,
86 1, 1, FLA_BR );
87 FLA_Repart_2x2_to_3x3( STL, /**/ STR, &S00, /**/ &s01, &S02,
88 /* ************* */ /* ************************** */
89 &s10t, /**/ &sigma11, &s12t,
90 SBL, /**/ SBR, &S20, /**/ &s21, &S22,
91 1, 1, FLA_BR );
93 /* ** */ /* ***** */
94 &nu11,
95 vB, &v21, 1, FLA_BOTTOM );
97 /* ** */ /* ***** */
98 &psi11,
99 yB, &y21, 1, FLA_BOTTOM );
101 /* ** */ /* ***** */
102 &zeta11,
103 zB, &z21, 1, FLA_BOTTOM );
104
105 /*------------------------------------------------------------*/
106
107 // [ alpha11_new, u21, tau11 ] = House2( alpha11, a21 );
109 alpha11,
110 a21, tau11 );
111
112 if ( FLA_Obj_width( A22 ) > 0 )
113 {
114 // y21' = a12t + u21' * A22;
115 // y21 = conj(a12t) + A22' * u21;
118
119 // y21 = y21 / tau11;
121
122 // a12t = a12t - conj(y21)^T;
124
127 &v21_b, 1, FLA_TOP );
128
129 // [ a12t_l, v12t_b, sigma11 ] = House2( a12t_l, a12t_r );
131
132 // v21_t = 1;
133 // v21_b = a12t_r^T;
136
137 // beta = - y21' * v21;
140
141 // z21 = ( A22 - u21 * y21' ) * v21 / sigma11;
142 // = ( A22 * v21 - u21 * y21' * v21 ) / sigma11;
143 // = ( A22 * v21 + beta * u21 ) / sigma11;
144 FLA_Copy( a21, z21 );
147
148 // A22 = A22 - u21 * y21' - z21 * v21';
151
152 // s01 = conj(V02) * v21;
154 }
155
156 // t01 = a10t' + U20' * u21;
159
160 /*------------------------------------------------------------*/
161
162 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02,
163 a10t, alpha11, /**/ a12t,
164 /* ************** */ /* ************************ */
165 &ABL, /**/ &ABR, A20, a21, /**/ A22,
166 FLA_TL );
167 FLA_Cont_with_3x3_to_2x2( &TTL, /**/ &TTR, T00, t01, /**/ T02,
168 t10t, tau11, /**/ t12t,
169 /* ************** */ /* ************************ */
170 &TBL, /**/ &TBR, T20, t21, /**/ T22,
171 FLA_TL );
172 FLA_Cont_with_3x3_to_2x2( &STL, /**/ &STR, S00, s01, /**/ S02,
173 s10t, sigma11, /**/ s12t,
174 /* ************** */ /* ************************ */
175 &SBL, /**/ &SBR, S20, s21, /**/ S22,
176 FLA_TL );
178 nu11,
179 /* ** */ /* ***** */
180 &vB, v21, FLA_TOP );
182 psi11,
183 /* ** */ /* ***** */
184 &yB, y21, FLA_TOP );
186 zeta11,
187 /* ** */ /* ***** */
188 &zB, z21, FLA_TOP );
189 }
190
191 FLA_Obj_free( &beta );
192 FLA_Obj_free( &v );
193 FLA_Obj_free( &y );
194 FLA_Obj_free( &z );
195
196 return FLA_SUCCESS;
197}
FLA_Error FLA_Scal(FLA_Obj alpha, FLA_Obj A)
Definition FLA_Scal.c:15
FLA_Error FLA_Dotc(FLA_Conj conj, FLA_Obj x, FLA_Obj y, FLA_Obj rho)
Definition FLA_Dotc.c:13
FLA_Error FLA_Axpyt(FLA_Trans trans, FLA_Obj alpha, FLA_Obj A, FLA_Obj B)
Definition FLA_Axpyt.c:15
FLA_Error FLA_Inv_scalc(FLA_Conj conjalpha, FLA_Obj alpha, FLA_Obj A)
Definition FLA_Inv_scalc.c:13
FLA_Error FLA_Gerc(FLA_Conj conjx, FLA_Conj conjy, FLA_Obj alpha, FLA_Obj x, FLA_Obj y, FLA_Obj A)
Definition FLA_Gerc.c:13
FLA_Error FLA_Gemvc(FLA_Trans transa, FLA_Conj conjx, FLA_Obj alpha, FLA_Obj A, FLA_Obj x, FLA_Obj beta, FLA_Obj y)
Definition FLA_Gemvc.c:13

References FLA_Axpyt(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copy(), FLA_Copyt(), FLA_Dotc(), FLA_Gemv(), FLA_Gemvc(), FLA_Gerc(), FLA_Househ2_UT(), FLA_Inv_scalc(), FLA_MINUS_ONE, FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_Obj_width(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Scal(), FLA_Set(), FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_unb_var2().

◆ FLA_Bidiag_UT_u_step_unb_var3()

FLA_Error FLA_Bidiag_UT_u_step_unb_var3 ( FLA_Obj  A,
FLA_Obj  TU,
FLA_Obj  TV 
)
19{
20 FLA_Obj ATL, ATR, A00, a01, A02,
22 A20, a21, A22;
23 FLA_Obj TTL, TTR, T00, t01, T02,
24 TBL, TBR, t10t, tau11, t12t,
25 T20, t21, T22;
26 FLA_Obj STL, STR, S00, s01, S02,
28 S20, s21, S22;
29 FLA_Obj wT, w01,
30 wB, omega11,
31 w21;
34 a12p;
35 FLA_Obj uT, u01,
37 u21;
40 u21p;
41 FLA_Obj vT, v01,
42 vB, nu11,
43 v21;
44 FLA_Obj yT, y01,
45 yB, psi11,
46 y21;
47 FLA_Obj zT, z01,
48 zB, zeta11,
49 z21;
50 FLA_Obj w, ap, u, up, v, y, z;
51
61
64 a12p_b;
67 v21_b;
68
70 dim_t m_A, n_A;
72
73
75
77 m_A = FLA_Obj_length( A );
78 n_A = FLA_Obj_width( A );
79
81 FLA_Obj_create( datatype_A, 1, 1, 0, 0, &beta );
82 FLA_Obj_create( datatype_A, 1, 1, 0, 0, &alpha12 );
88 FLA_Obj_create( datatype_A, 1, 1, 0, 0, &minus_zeta11 );
89 FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
90 FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
91 FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
92 FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
93 FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
94 FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
95 FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
96
97 FLA_Part_2x2( A, &ATL, &ATR,
98 &ABL, &ABR, 0, 0, FLA_TL );
99 FLA_Part_2x2( T, &TTL, &TTR,
100 &TBL, &TBR, 0, 0, FLA_TL );
101 FLA_Part_2x2( S, &STL, &STR,
102 &SBL, &SBR, 0, 0, FLA_TL );
103 FLA_Part_2x1( w, &wT,
104 &wB, 0, FLA_TOP );
105 FLA_Part_2x1( ap, &apT,
106 &apB, 0, FLA_TOP );
107 FLA_Part_2x1( u, &uT,
108 &uB, 0, FLA_TOP );
109 FLA_Part_2x1( up, &uTp,
110 &uBp, 0, FLA_TOP );
111 FLA_Part_2x1( v, &vT,
112 &vB, 0, FLA_TOP );
113 FLA_Part_2x1( y, &yT,
114 &yB, 0, FLA_TOP );
115 FLA_Part_2x1( z, &zT,
116 &zB, 0, FLA_TOP );
117
118 while ( FLA_Obj_length( ATL ) < b_alg )
119 {
120 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02,
121 /* ************* */ /* ************************** */
122 &a10t, /**/ &alpha11, &a12t,
123 ABL, /**/ ABR, &A20, /**/ &a21, &A22,
124 1, 1, FLA_BR );
125 FLA_Repart_2x2_to_3x3( TTL, /**/ TTR, &T00, /**/ &t01, &T02,
126 /* ************* */ /* ************************** */
127 &t10t, /**/ &tau11, &t12t,
128 TBL, /**/ TBR, &T20, /**/ &t21, &T22,
129 1, 1, FLA_BR );
130 FLA_Repart_2x2_to_3x3( STL, /**/ STR, &S00, /**/ &s01, &S02,
131 /* ************* */ /* ************************** */
132 &s10t, /**/ &sigma11, &s12t,
133 SBL, /**/ SBR, &S20, /**/ &s21, &S22,
134 1, 1, FLA_BR );
136 /* ** */ /* ***** */
137 &omega11,
138 wB, &w21, 1, FLA_BOTTOM );
140 /* ** */ /* ***** */
141 &alpha11p,
142 apB, &a12p, 1, FLA_BOTTOM );
144 /* ** */ /* ***** */
145 &upsilon11,
146 uB, &u21, 1, FLA_BOTTOM );
148 /* ** */ /* ***** */
149 &upsilon11p,
150 uBp, &u21p, 1, FLA_BOTTOM );
152 /* ** */ /* ***** */
153 &nu11,
154 vB, &v21, 1, FLA_BOTTOM );
156 /* ** */ /* ***** */
157 &psi11,
158 yB, &y21, 1, FLA_BOTTOM );
160 /* ** */ /* ***** */
161 &zeta11,
162 zB, &z21, 1, FLA_BOTTOM );
163
164 /*------------------------------------------------------------*/
165
166 if ( FLA_Obj_length( ATL ) > 0 )
167 {
170
173
176
179
180 // alpha11 = alpha11 - upsilon11 * conj(psi11) - zeta11 * conj(nu1);
183
184 // a21 = a21 - u21 * conj(psi11) - z21 * conj(nu11);
187
188 // a12t = a12t - upsilon11 * y21' - zeta11 * v21';
191 }
192
193 // [ alpha11, u21p, tau11 ] = House2( alpha11, a21 );
195 alpha11,
196 a21, tau11 );
197 FLA_Copy( a21, u21p );
198
199 if ( FLA_Obj_width( A22 ) > 0 )
200 {
201 // minus_inv_tau11 = - 1 / tau11;
204
205 // a12p = ( tau11 - 1 ) * a12t^T / tau11;
206 // = a12t^T - ( 1 / tau11 ) * a12t^T;
209 }
210
211 if ( FLA_Obj_length( ATL ) > 0 )
212 {
213 // A22 = A22 - u21 * y21' - z21 * v21';
216 }
217
218 if ( FLA_Obj_width( A22 ) > 0 )
219 {
220 // y21 = A22' * u21p;
222
223 // a12p = a12p - conj(y21) / tau11;
225
226 // w21 = A22 * conj(a12p);
228
229 // y21 = y21 + conj(a12t)^T;
231
234 &v21_b, 1, FLA_TOP );
236 &a12p_b, 1, FLA_TOP );
237
238 // [ alpha12, psi11_minus_alpha12, sigma11 ] = House2s( a12p_t, a12p_b );
240 a12p_t,
241 a12p_b,
243
244 // v21 = conj( ( a12p - alpha12 * e0 ) / ( psi11 - alpha12 ) );
245 FLA_Copy( a12p, v21 );
249
250 // a12t_l = alpha12;
251 // a12t_r = v21_b^T;
254 }
255
256 // u21 = u21p;
257 FLA_Copy( u21p, u21 );
258
259 if ( FLA_Obj_width( A22 ) > 0 )
260 {
261 // beta = - y21' * v21 / tau11;
265
267
268 // minus_conj_alpha12 = - conj(alpha12);
271
272 // z21 = ( w21 - conj(alpha12) * A22 * e0 ) / conj(psi11 - alpha12) + beta * u21;
273 FLA_Copy( w21, z21 );
276 FLA_Axpy( beta, u21, z21 );
277
278 // y21 = y21 / tau11;
279 // z21 = z21 / sigma11;
282
283 // s01 = conj(V02) * v21;
285 }
286
287 // t01 = a10t' + U20' * u21;
290
291 // Update A22 if this is the last iteration; this is needed when we're
292 // being called from the blocked routine so A22 is left in a valid state.
293 if ( FLA_Obj_length( ATL ) + 1 == b_alg &&
294 FLA_Obj_width( A22 ) > 0 )
295 {
296 // A22 = A22 - u21 * y21' - z21 * v21';
299 }
300
301 /*------------------------------------------------------------*/
302
303 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02,
304 a10t, alpha11, /**/ a12t,
305 /* ************** */ /* ************************ */
306 &ABL, /**/ &ABR, A20, a21, /**/ A22,
307 FLA_TL );
308 FLA_Cont_with_3x3_to_2x2( &TTL, /**/ &TTR, T00, t01, /**/ T02,
309 t10t, tau11, /**/ t12t,
310 /* ************** */ /* ************************ */
311 &TBL, /**/ &TBR, T20, t21, /**/ T22,
312 FLA_TL );
313 FLA_Cont_with_3x3_to_2x2( &STL, /**/ &STR, S00, s01, /**/ S02,
314 s10t, sigma11, /**/ s12t,
315 /* ************** */ /* ************************ */
316 &SBL, /**/ &SBR, S20, s21, /**/ S22,
317 FLA_TL );
319 omega11,
320 /* ** */ /* ***** */
321 &wB, w21, FLA_TOP );
323 alpha11p,
324 /* ** */ /* ***** */
325 &apB, a12p, FLA_TOP );
327 upsilon11,
328 /* ** */ /* ***** */
329 &uB, u21, FLA_TOP );
331 upsilon11p,
332 /* ** */ /* ***** */
333 &uBp, u21p, FLA_TOP );
335 nu11,
336 /* ** */ /* ***** */
337 &vB, v21, FLA_TOP );
339 psi11,
340 /* ** */ /* ***** */
341 &yB, y21, FLA_TOP );
343 zeta11,
344 /* ** */ /* ***** */
345 &zB, z21, FLA_TOP );
346 }
347
349 FLA_Obj_free( &beta );
357 FLA_Obj_free( &w );
358 FLA_Obj_free( &ap );
359 FLA_Obj_free( &u );
360 FLA_Obj_free( &up );
361 FLA_Obj_free( &v );
362 FLA_Obj_free( &y );
363 FLA_Obj_free( &z );
364
365 return FLA_SUCCESS;
366}
FLA_Error FLA_Axpy(FLA_Obj alpha, FLA_Obj A, FLA_Obj B)
Definition FLA_Axpy.c:15
FLA_Error FLA_Mult_add(FLA_Obj alpha, FLA_Obj beta, FLA_Obj gamma)
Definition FLA_Mult_add.c:13
FLA_Error FLA_Conjugate(FLA_Obj A)
Definition FLA_Conjugate.c:13
FLA_Error FLA_Househ2s_UT(FLA_Side side, FLA_Obj chi_1, FLA_Obj x2, FLA_Obj alpha, FLA_Obj chi_1_minus_alpha, FLA_Obj tau)
Definition FLA_Househ2s_UT.c:16

References FLA_Axpy(), FLA_Axpyt(), FLA_Conjugate(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copy(), FLA_Copyt(), FLA_Dotc(), FLA_Gemv(), FLA_Gemvc(), FLA_Gerc(), FLA_Househ2_UT(), FLA_Househ2s_UT(), FLA_Inv_scalc(), FLA_MINUS_ONE, FLA_Mult_add(), FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_Obj_width(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Scal(), FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_unb_var3().

◆ FLA_Bidiag_UT_u_step_unb_var4()

FLA_Error FLA_Bidiag_UT_u_step_unb_var4 ( FLA_Obj  A,
FLA_Obj  Y,
FLA_Obj  Z,
FLA_Obj  TU,
FLA_Obj  TV 
)
36{
37 FLA_Obj ATL, ATR, A00, a01, A02,
39 A20, a21, A22;
40 FLA_Obj YTL, YTR, Y00, y01, Y02,
41 YBL, YBR, y10t, psi11, y12t,
42 Y20, y21, Y22;
43 FLA_Obj ZTL, ZTR, Z00, z01, Z02,
45 Z20, z21, Z22;
46 FLA_Obj TTL, TTR, T00, t01, T02,
47 TBL, TBR, t10t, tau11, t12t,
48 T20, t21, T22;
49 FLA_Obj STL, STR, S00, s01, S02,
51 S20, s21, S22;
52 FLA_Obj wT, w01,
53 wB, omega11,
54 w21;
57 a22l;
60 a12p;
61 FLA_Obj uT, u01,
63 u21;
66 u21p;
67 FLA_Obj vT, v01,
68 vB, nu11,
69 v21;
70 FLA_Obj dT, d0,
71 dB, delta1,
72 d2;
73 FLA_Obj eT, e0,
74 eB, epsilon1,
75 e2;
76 FLA_Obj fT, f0,
77 fB, phi1,
78 f2;
79 FLA_Obj gT, g0,
80 gB, ghi1,
81 g2;
82 FLA_Obj w, al, ap, u, up, v;
83 FLA_Obj d, e, f, g;
84
96
98 a01_b;
102 a12p_b;
105 v21_b;
107 Y20_b;
108 FLA_Obj a2;
109
111 dim_t m_A, n_A;
112 dim_t b_alg;
113
114
116
118 m_A = FLA_Obj_length( A );
119 n_A = FLA_Obj_width( A );
120
122 FLA_Obj_create( datatype_A, 1, 1, 0, 0, &last_elem );
123 FLA_Obj_create( datatype_A, 1, 1, 0, 0, &beta );
124 FLA_Obj_create( datatype_A, 1, 1, 0, 0, &alpha12 );
125 FLA_Obj_create( datatype_A, 1, 1, 0, 0, &minus_alpha12 );
131 FLA_Obj_create( datatype_A, 1, 1, 0, 0, &minus_zeta11 );
132 FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
133 FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &al );
134 FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
135 FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
136 FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
137 FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
138 FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &d );
139 FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &e );
140 FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
141 FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &g );
142
143 FLA_Set( FLA_ZERO, Y );
144 FLA_Set( FLA_ZERO, Z );
145
146 FLA_Part_2x2( A, &ATL, &ATR,
147 &ABL, &ABR, 0, 0, FLA_TL );
148 FLA_Part_2x2( Y, &YTL, &YTR,
149 &YBL, &YBR, 0, 0, FLA_TL );
150 FLA_Part_2x2( Z, &ZTL, &ZTR,
151 &ZBL, &ZBR, 0, 0, FLA_TL );
152 FLA_Part_2x2( T, &TTL, &TTR,
153 &TBL, &TBR, 0, 0, FLA_TL );
154 FLA_Part_2x2( S, &STL, &STR,
155 &SBL, &SBR, 0, 0, FLA_TL );
156 FLA_Part_2x1( w, &wT,
157 &wB, 0, FLA_TOP );
158 FLA_Part_2x1( al, &alT,
159 &alB, 0, FLA_TOP );
160 FLA_Part_2x1( ap, &apT,
161 &apB, 0, FLA_TOP );
162 FLA_Part_2x1( u, &uT,
163 &uB, 0, FLA_TOP );
164 FLA_Part_2x1( up, &uTp,
165 &uBp, 0, FLA_TOP );
166 FLA_Part_2x1( v, &vT,
167 &vB, 0, FLA_TOP );
168 FLA_Part_2x1( d, &dT,
169 &dB, 0, FLA_TOP );
170 FLA_Part_2x1( e, &eT,
171 &eB, 0, FLA_TOP );
172 FLA_Part_2x1( f, &fT,
173 &fB, 0, FLA_TOP );
174 FLA_Part_2x1( g, &gT,
175 &gB, 0, FLA_TOP );
176
177 while ( FLA_Obj_length( ATL ) < b_alg )
178 {
179 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02,
180 /* ************* */ /* ************************** */
181 &a10t, /**/ &alpha11, &a12t,
182 ABL, /**/ ABR, &A20, /**/ &a21, &A22,
183 1, 1, FLA_BR );
184 FLA_Repart_2x2_to_3x3( YTL, /**/ YTR, &Y00, /**/ &y01, &Y02,
185 /* ************* */ /* ************************ */
186 &y10t, /**/ &psi11, &y12t,
187 YBL, /**/ YBR, &Y20, /**/ &y21, &Y22,
188 1, 1, FLA_BR );
189 FLA_Repart_2x2_to_3x3( ZTL, /**/ ZTR, &Z00, /**/ &z01, &Z02,
190 /* ************* */ /* ************************* */
191 &z10t, /**/ &zeta11, &z12t,
192 ZBL, /**/ ZBR, &Z20, /**/ &z21, &Z22,
193 1, 1, FLA_BR );
194 FLA_Repart_2x2_to_3x3( TTL, /**/ TTR, &T00, /**/ &t01, &T02,
195 /* ************* */ /* ************************** */
196 &t10t, /**/ &tau11, &t12t,
197 TBL, /**/ TBR, &T20, /**/ &t21, &T22,
198 1, 1, FLA_BR );
199 FLA_Repart_2x2_to_3x3( STL, /**/ STR, &S00, /**/ &s01, &S02,
200 /* ************* */ /* ************************** */
201 &s10t, /**/ &sigma11, &s12t,
202 SBL, /**/ SBR, &S20, /**/ &s21, &S22,
203 1, 1, FLA_BR );
205 /* ** */ /* ***** */
206 &omega11,
207 wB, &w21, 1, FLA_BOTTOM );
209 /* ** */ /* ***** */
210 &alpha11l,
211 alB, &a22l, 1, FLA_BOTTOM );
213 /* ** */ /* ***** */
214 &alpha11p,
215 apB, &a12p, 1, FLA_BOTTOM );
217 /* ** */ /* ***** */
218 &upsilon11,
219 uB, &u21, 1, FLA_BOTTOM );
221 /* ** */ /* ***** */
222 &upsilon11p,
223 uBp, &u21p, 1, FLA_BOTTOM );
225 /* ** */ /* ***** */
226 &nu11,
227 vB, &v21, 1, FLA_BOTTOM );
229 /* ** */ /* ****** */
230 &delta1,
231 dB, &d2, 1, FLA_BOTTOM );
233 /* ** */ /* ******** */
234 &epsilon1,
235 eB, &e2, 1, FLA_BOTTOM );
237 /* ** */ /* **** */
238 &phi1,
239 fB, &f2, 1, FLA_BOTTOM );
241 /* ** */ /* **** */
242 &ghi1,
243 gB, &g2, 1, FLA_BOTTOM );
244
245 /*------------------------------------------------------------*/
246
247 // Save last element of a01 and set it to one so we can use a01 as
248 // v10t^T in subsequent computations. We will restore a01_b later on.
249 // Also note: V20^T is stored in A02.
250 if ( FLA_Obj_length( ATL ) > 0 )
251 {
253 &a01_b, 1, FLA_BOTTOM );
256 }
257
259 a21, &a2 );
260
261 // alpha11 = alpha11 - u10t * y10t' - z10t * v10t';
262 // a21 = a21 - U20 * y10t' - Z20 * v10t';
265
266 // a12t = a12t - u10t * Y20' - z10t * V20';
269
270 // Restore last element of a01.
271 if ( FLA_Obj_length( ATL ) > 0 )
272 {
274 }
275
276 // [ alpha11, u21p, tau11 ] = House2( alpha11, a21 );
278 alpha11,
279 a21, tau11 );
280 FLA_Copy( a21, u21p );
281
282 if ( FLA_Obj_width( A22 ) > 0 )
283 {
284 // minus_inv_tau11 = - 1 / tau11;
287
288 // a12p = ( tau11 - 1 ) * a12t^T / tau11;
289 // = a12t^T - ( 1 / tau11 ) * a12t^T;
292
293 // y21 = - Y20 * ( U20' * u21p ) - V20 * ( Z20' * u21p );
296
297 FLA_Set( FLA_ZERO, y21 );
300
301 // t01 = a10t' + U20' * u21;
303 FLA_Axpy( FLA_ONE, d0, t01 );
304
305 // y21 = y21 + A22' * u21p;
307
308 // a12p = a12p - conj(y21) / tau11;
310
311 // w21 = A22 * conj(a12p);
313
314 // w21 = w21 - U20 * ( Y20' * conj(a12p) ) - Z20 * ( V20' * conj(a12p) );
317
320
323 &Y20_b, 1, FLA_TOP );
325
326 // a22l = A22 * e0 - U20 * ( Y20' * e0 ) - Z20 * ( V20' * e0 );
327 FLA_Copy( A22_l, a22l );
330
331 // y21 = y21 + conj(a12t)^T;
333
336 &v21_b, 1, FLA_TOP );
338 &a12p_b, 1, FLA_TOP );
339
340 // [ alpha12, psi11_minus_alpha12, sigma11 ] = House2s( a12p_t, a12p_b );
342 a12p_t,
343 a12p_b,
345
346 // v21 = conj( ( a12p - alpha12 * e0 ) / ( psi11 - alpha12 ) );
347 FLA_Copy( a12p, v21 );
351
352 // minus_conj_alpha12 = - conj(alpha12);
355
356 // s01 = V20' * v21;
357 // = conj(V02) * v21;
358 // = conj(V02) * conj( ( a12p - alpha12 * e0 ) / ( psi11 - alpha12 ) );
359 // = conj(V02) * ( conj(a12p) - conj(alpha12) * e0 ) / conj( psi11 - alpha12 ) );
360 // = ( conj(V02) * conj(a12p) - conj(V02) * conj(alpha12) * e0 ) / conj( psi11 - alpha12 );
361 // = ( g0 - conj(V02) * conj(alpha12) * e0 ) / conj( psi11 - alpha12 );
362 FLA_Copy( g0, s01 );
365
366 // a12t_l = alpha12;
367 // a12t_r = v21_b^T;
370 }
371
372 // u21 = u21p;
373 FLA_Copy( u21p, u21 );
374
375 if ( FLA_Obj_width( A22 ) > 0 )
376 {
377 // beta = - y21' * v21 / tau11;
381
382 // z21 = ( w21 - conj(alpha12) * a22l ) / conj(psi11 - alpha12) + beta * u21;
383 FLA_Copy( w21, z21 );
386 FLA_Axpy( beta, u21, z21 );
387
388 // y21 = y21 / tau11;
389 // z21 = z21 / sigma11;
392 }
393 else // if ( FLA_Obj_width( A22 ) == 0 )
394 {
395 // t01 = a10t' + U20' * u21;
398 }
399
400 /*------------------------------------------------------------*/
401
402 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02,
403 a10t, alpha11, /**/ a12t,
404 /* ************** */ /* ************************ */
405 &ABL, /**/ &ABR, A20, a21, /**/ A22,
406 FLA_TL );
407 FLA_Cont_with_3x3_to_2x2( &YTL, /**/ &YTR, Y00, y01, /**/ Y02,
408 y10t, psi11, /**/ y12t,
409 /* ************** */ /* ********************** */
410 &YBL, /**/ &YBR, Y20, y21, /**/ Y22,
411 FLA_TL );
412 FLA_Cont_with_3x3_to_2x2( &ZTL, /**/ &ZTR, Z00, z01, /**/ Z02,
413 z10t, zeta11, /**/ z12t,
414 /* ************** */ /* *********************** */
415 &ZBL, /**/ &ZBR, Z20, z21, /**/ Z22,
416 FLA_TL );
417 FLA_Cont_with_3x3_to_2x2( &TTL, /**/ &TTR, T00, t01, /**/ T02,
418 t10t, tau11, /**/ t12t,
419 /* ************** */ /* ************************ */
420 &TBL, /**/ &TBR, T20, t21, /**/ T22,
421 FLA_TL );
422 FLA_Cont_with_3x3_to_2x2( &STL, /**/ &STR, S00, s01, /**/ S02,
423 s10t, sigma11, /**/ s12t,
424 /* ************** */ /* ************************ */
425 &SBL, /**/ &SBR, S20, s21, /**/ S22,
426 FLA_TL );
428 omega11,
429 /* ** */ /* ***** */
430 &wB, w21, FLA_TOP );
432 alpha11l,
433 /* ** */ /* ***** */
434 &alB, a22l, FLA_TOP );
436 alpha11p,
437 /* ** */ /* ***** */
438 &apB, a12p, FLA_TOP );
440 upsilon11,
441 /* ** */ /* ***** */
442 &uB, u21, FLA_TOP );
444 upsilon11p,
445 /* ** */ /* ***** */
446 &uBp, u21p, FLA_TOP );
448 nu11,
449 /* ** */ /* ***** */
450 &vB, v21, FLA_TOP );
452 delta1,
453 /* ** */ /* ****** */
454 &dB, d2, FLA_TOP );
456 epsilon1,
457 /* ** */ /* ******** */
458 &eB, e2, FLA_TOP );
460 phi1,
461 /* ** */ /* **** */
462 &fB, f2, FLA_TOP );
464 ghi1,
465 /* ** */ /* **** */
466 &gB, g2, FLA_TOP );
467 }
468
471 FLA_Obj_free( &beta );
480 FLA_Obj_free( &w );
481 FLA_Obj_free( &al );
482 FLA_Obj_free( &ap );
483 FLA_Obj_free( &u );
484 FLA_Obj_free( &up );
485 FLA_Obj_free( &v );
486 FLA_Obj_free( &d );
487 FLA_Obj_free( &e );
488 FLA_Obj_free( &f );
489 FLA_Obj_free( &g );
490
491 return FLA_SUCCESS;
492}
FLA_Error FLA_Merge_2x1(FLA_Obj AT, FLA_Obj AB, FLA_Obj *A)
Definition FLA_View.c:541

References FLA_Axpy(), FLA_Axpyt(), FLA_Conjugate(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copy(), FLA_Copyt(), FLA_Dotc(), FLA_Gemv(), FLA_Gemvc(), FLA_Househ2_UT(), FLA_Househ2s_UT(), FLA_Inv_scalc(), FLA_Merge_2x1(), FLA_MINUS_ONE, FLA_Mult_add(), FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_Obj_width(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Scal(), FLA_Set(), FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_unb_var4().

◆ FLA_Bidiag_UT_u_step_unb_var5()

FLA_Error FLA_Bidiag_UT_u_step_unb_var5 ( FLA_Obj  A,
FLA_Obj  Y,
FLA_Obj  Z,
FLA_Obj  TU,
FLA_Obj  TV 
)
36{
37 FLA_Obj ATL, ATR, A00, a01, A02,
39 A20, a21, A22;
40 FLA_Obj YTL, YTR, Y00, y01, Y02,
41 YBL, YBR, y10t, psi11, y12t,
42 Y20, y21, Y22;
43 FLA_Obj ZTL, ZTR, Z00, z01, Z02,
45 Z20, z21, Z22;
46 FLA_Obj TTL, TTR, T00, t01, T02,
47 TBL, TBR, t10t, tau11, t12t,
48 T20, t21, T22;
49 FLA_Obj STL, STR, S00, s01, S02,
51 S20, s21, S22;
52 FLA_Obj uT, u01,
54 u21;
55 FLA_Obj vT, v01,
56 vB, nu11,
57 v21;
58 FLA_Obj dT, d0,
59 dB, delta1,
60 d2;
61 FLA_Obj eT, e0,
62 eB, epsilon1,
63 e2;
64 FLA_Obj fT, f0,
65 fB, phi1,
66 f2;
67 FLA_Obj gT, g0,
68 gB, ghi1,
69 g2;
70 FLA_Obj u, v;
71 FLA_Obj d, e, f, g;
72
77
79 a01_b;
82 v21_b;
83 FLA_Obj a2;
84
86 dim_t m_A, n_A;
88
89
91
93 m_A = FLA_Obj_length( A );
94 n_A = FLA_Obj_width( A );
95
96 FLA_Obj_create( datatype_A, 1, 1, 0, 0, &last_elem );
97 FLA_Obj_create( datatype_A, 1, 1, 0, 0, &beta );
99 FLA_Obj_create( datatype_A, 1, 1, 0, 0, &minus_zeta11 );
100 FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
101 FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
102 FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &d );
103 FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &e );
104 FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
105 FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &g );
106
107 FLA_Set( FLA_ZERO, Y );
108 FLA_Set( FLA_ZERO, Z );
109
110 FLA_Part_2x2( A, &ATL, &ATR,
111 &ABL, &ABR, 0, 0, FLA_TL );
112 FLA_Part_2x2( Y, &YTL, &YTR,
113 &YBL, &YBR, 0, 0, FLA_TL );
114 FLA_Part_2x2( Z, &ZTL, &ZTR,
115 &ZBL, &ZBR, 0, 0, FLA_TL );
116 FLA_Part_2x2( T, &TTL, &TTR,
117 &TBL, &TBR, 0, 0, FLA_TL );
118 FLA_Part_2x2( S, &STL, &STR,
119 &SBL, &SBR, 0, 0, FLA_TL );
120 FLA_Part_2x1( u, &uT,
121 &uB, 0, FLA_TOP );
122 FLA_Part_2x1( v, &vT,
123 &vB, 0, FLA_TOP );
124 FLA_Part_2x1( d, &dT,
125 &dB, 0, FLA_TOP );
126 FLA_Part_2x1( e, &eT,
127 &eB, 0, FLA_TOP );
128 FLA_Part_2x1( f, &fT,
129 &fB, 0, FLA_TOP );
130 FLA_Part_2x1( g, &gT,
131 &gB, 0, FLA_TOP );
132
133 while ( FLA_Obj_length( ATL ) < b_alg )
134 {
135 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02,
136 /* ************* */ /* ************************** */
137 &a10t, /**/ &alpha11, &a12t,
138 ABL, /**/ ABR, &A20, /**/ &a21, &A22,
139 1, 1, FLA_BR );
140 FLA_Repart_2x2_to_3x3( YTL, /**/ YTR, &Y00, /**/ &y01, &Y02,
141 /* ************* */ /* ************************ */
142 &y10t, /**/ &psi11, &y12t,
143 YBL, /**/ YBR, &Y20, /**/ &y21, &Y22,
144 1, 1, FLA_BR );
145 FLA_Repart_2x2_to_3x3( ZTL, /**/ ZTR, &Z00, /**/ &z01, &Z02,
146 /* ************* */ /* ************************* */
147 &z10t, /**/ &zeta11, &z12t,
148 ZBL, /**/ ZBR, &Z20, /**/ &z21, &Z22,
149 1, 1, FLA_BR );
150 FLA_Repart_2x2_to_3x3( TTL, /**/ TTR, &T00, /**/ &t01, &T02,
151 /* ************* */ /* ************************** */
152 &t10t, /**/ &tau11, &t12t,
153 TBL, /**/ TBR, &T20, /**/ &t21, &T22,
154 1, 1, FLA_BR );
155 FLA_Repart_2x2_to_3x3( STL, /**/ STR, &S00, /**/ &s01, &S02,
156 /* ************* */ /* ************************** */
157 &s10t, /**/ &sigma11, &s12t,
158 SBL, /**/ SBR, &S20, /**/ &s21, &S22,
159 1, 1, FLA_BR );
161 /* ** */ /* ***** */
162 &upsilon11,
163 uB, &u21, 1, FLA_BOTTOM );
165 /* ** */ /* ***** */
166 &nu11,
167 vB, &v21, 1, FLA_BOTTOM );
169 /* ** */ /* ****** */
170 &delta1,
171 dB, &d2, 1, FLA_BOTTOM );
173 /* ** */ /* ******** */
174 &epsilon1,
175 eB, &e2, 1, FLA_BOTTOM );
177 /* ** */ /* **** */
178 &phi1,
179 fB, &f2, 1, FLA_BOTTOM );
181 /* ** */ /* **** */
182 &ghi1,
183 gB, &g2, 1, FLA_BOTTOM );
184
185 /*------------------------------------------------------------*/
186
187 // Save last element of a01 and set it to one so we can use a01 as
188 // v10t^T in subsequent computations. We will restore a01_b later on.
189 // Also note: V20^T is stored in A02.
190 if ( FLA_Obj_length( ATL ) > 0 )
191 {
193 &a01_b, 1, FLA_BOTTOM );
196 }
197
199 a21, &a2 );
200
201 // alpha11 = alpha11 - u10t * y10t' - z10t * v10t';
202 // a21 = a21 - U20 * y10t' - Z20 * v10t';
205
206 // a12t = a12t - u10t * Y20' - z10t * V20';
209
210 // Restore last element of a01.
211 if ( FLA_Obj_length( ATL ) > 0 )
212 {
214 }
215
216 // [ alpha11, u21, tau11 ] = House2( alpha11, a21 );
218 alpha11,
219 a21, tau11 );
220 FLA_Copy( a21, u21 );
221
222 if ( FLA_Obj_width( A22 ) > 0 )
223 {
224 // y21' = a12t + u21' * A22;
225 // y21 = conj(a12t) + A22' * u21;
228
229 // y21 = y21 - Y20 * ( U20' * u21 ) - V20 * ( Z20' * u21 );
232
233 // t01 = a10t' + U20' * u21;
235 FLA_Axpy( FLA_ONE, d0, t01 );
236
239
240 // y21 = y21 / tau11;
242
243 // a12t = a12t - conj(y21)^T;
245
248 &v21_b, 1, FLA_TOP );
249
250 // [ a12t_l, v21_b, sigma11 ] = House2( a12t_l, a12t_r );
252
253 // v21_t = 1;
254 // v21_b = a12t_r^T;
257
258 // beta = - y21' * v21;
261
262 // z21 = A22 * v21 + beta * u21;
263 FLA_Copy( u21, z21 );
265
266 // z21 = z21 - U20 * ( Y20' * v21 ) - Z20 * ( V20' * v21 );
269
272
273 // z21 = z21 / sigma11;
275
276 // s01 = conj(V02) * v21;
277 FLA_Copy( g0, s01 );
278 }
279 else // if ( FLA_Obj_width( A22 ) == 0 )
280 {
281 // t01 = a10t' + U20' * u21;
284 }
285
286 /*------------------------------------------------------------*/
287
288 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02,
289 a10t, alpha11, /**/ a12t,
290 /* ************** */ /* ************************ */
291 &ABL, /**/ &ABR, A20, a21, /**/ A22,
292 FLA_TL );
293 FLA_Cont_with_3x3_to_2x2( &YTL, /**/ &YTR, Y00, y01, /**/ Y02,
294 y10t, psi11, /**/ y12t,
295 /* ************** */ /* ********************** */
296 &YBL, /**/ &YBR, Y20, y21, /**/ Y22,
297 FLA_TL );
298 FLA_Cont_with_3x3_to_2x2( &ZTL, /**/ &ZTR, Z00, z01, /**/ Z02,
299 z10t, zeta11, /**/ z12t,
300 /* ************** */ /* *********************** */
301 &ZBL, /**/ &ZBR, Z20, z21, /**/ Z22,
302 FLA_TL );
303 FLA_Cont_with_3x3_to_2x2( &TTL, /**/ &TTR, T00, t01, /**/ T02,
304 t10t, tau11, /**/ t12t,
305 /* ************** */ /* ************************ */
306 &TBL, /**/ &TBR, T20, t21, /**/ T22,
307 FLA_TL );
308 FLA_Cont_with_3x3_to_2x2( &STL, /**/ &STR, S00, s01, /**/ S02,
309 s10t, sigma11, /**/ s12t,
310 /* ************** */ /* ************************ */
311 &SBL, /**/ &SBR, S20, s21, /**/ S22,
312 FLA_TL );
314 upsilon11,
315 /* ** */ /* ***** */
316 &uB, u21, FLA_TOP );
318 nu11,
319 /* ** */ /* ***** */
320 &vB, v21, FLA_TOP );
322 delta1,
323 /* ** */ /* ****** */
324 &dB, d2, FLA_TOP );
326 epsilon1,
327 /* ** */ /* ******** */
328 &eB, e2, FLA_TOP );
330 phi1,
331 /* ** */ /* **** */
332 &fB, f2, FLA_TOP );
334 ghi1,
335 /* ** */ /* **** */
336 &gB, g2, FLA_TOP );
337 }
338
340 FLA_Obj_free( &beta );
343 FLA_Obj_free( &u );
344 FLA_Obj_free( &v );
345 FLA_Obj_free( &d );
346 FLA_Obj_free( &e );
347 FLA_Obj_free( &f );
348 FLA_Obj_free( &g );
349
350 return FLA_SUCCESS;
351}

References FLA_Axpy(), FLA_Axpyt(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copy(), FLA_Copyt(), FLA_Dotc(), FLA_Gemv(), FLA_Gemvc(), FLA_Househ2_UT(), FLA_Inv_scalc(), FLA_Merge_2x1(), FLA_MINUS_ONE, FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_Obj_width(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Scal(), FLA_Set(), FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_unb_var5().

◆ FLA_Bidiag_UT_u_unb_var1()

FLA_Error FLA_Bidiag_UT_u_unb_var1 ( FLA_Obj  A,
FLA_Obj  TU,
FLA_Obj  TV 
)
14{
16}
FLA_Error FLA_Bidiag_UT_u_step_unb_var1(FLA_Obj A, FLA_Obj T, FLA_Obj S)
Definition FLA_Bidiag_UT_u_unb_var1.c:18

References FLA_Bidiag_UT_u_step_unb_var1(), and i.

Referenced by FLA_Bidiag_UT_u().

◆ FLA_Bidiag_UT_u_unb_var2()

FLA_Error FLA_Bidiag_UT_u_unb_var2 ( FLA_Obj  A,
FLA_Obj  TU,
FLA_Obj  TV 
)
14{
16}
FLA_Error FLA_Bidiag_UT_u_step_unb_var2(FLA_Obj A, FLA_Obj T, FLA_Obj S)
Definition FLA_Bidiag_UT_u_unb_var2.c:18

References FLA_Bidiag_UT_u_step_unb_var2(), and i.

Referenced by FLA_Bidiag_UT_u().

◆ FLA_Bidiag_UT_u_unb_var3()

FLA_Error FLA_Bidiag_UT_u_unb_var3 ( FLA_Obj  A,
FLA_Obj  TU,
FLA_Obj  TV 
)
14{
16}
FLA_Error FLA_Bidiag_UT_u_step_unb_var3(FLA_Obj A, FLA_Obj T, FLA_Obj S)
Definition FLA_Bidiag_UT_u_unb_var3.c:18

References FLA_Bidiag_UT_u_step_unb_var3(), and i.

Referenced by FLA_Bidiag_UT_u().

◆ FLA_Bidiag_UT_u_unb_var4()

FLA_Error FLA_Bidiag_UT_u_unb_var4 ( FLA_Obj  A,
FLA_Obj  TU,
FLA_Obj  TV 
)
14{
16 FLA_Obj Y, Z;
18 dim_t m_A, n_A;
19
21 m_A = FLA_Obj_length( A );
22 n_A = FLA_Obj_width( A );
23
24 FLA_Obj_create( datatype_A, n_A, n_A, 0, 0, &Y );
25 FLA_Obj_create( datatype_A, m_A, n_A, 0, 0, &Z );
26
28
29 FLA_Obj_free( &Y );
30 FLA_Obj_free( &Z );
31
32 return r_val;
33}
FLA_Error FLA_Bidiag_UT_u_step_unb_var4(FLA_Obj A, FLA_Obj Y, FLA_Obj Z, FLA_Obj T, FLA_Obj S)
Definition FLA_Bidiag_UT_u_unb_var4.c:35

References FLA_Bidiag_UT_u_step_unb_var4(), FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_Obj_width(), and i.

Referenced by FLA_Bidiag_UT_u().

◆ FLA_Bidiag_UT_u_unb_var5()

FLA_Error FLA_Bidiag_UT_u_unb_var5 ( FLA_Obj  A,
FLA_Obj  TU,
FLA_Obj  TV 
)
14{
16 FLA_Obj Y, Z;
18 dim_t m_A, n_A;
19
21 m_A = FLA_Obj_length( A );
22 n_A = FLA_Obj_width( A );
23
24 FLA_Obj_create( datatype_A, n_A, n_A, 0, 0, &Y );
25 FLA_Obj_create( datatype_A, m_A, n_A, 0, 0, &Z );
26
28
29 FLA_Obj_free( &Y );
30 FLA_Obj_free( &Z );
31
32 return r_val;
33}
FLA_Error FLA_Bidiag_UT_u_step_unb_var5(FLA_Obj A, FLA_Obj Y, FLA_Obj Z, FLA_Obj T, FLA_Obj S)
Definition FLA_Bidiag_UT_u_unb_var5.c:35

References FLA_Bidiag_UT_u_step_unb_var5(), FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_Obj_width(), and i.

Referenced by FLA_Bidiag_UT_u().

◆ FLA_Fused_Ahx_Axpy_Ax_opc_var1()

FLA_Error FLA_Fused_Ahx_Axpy_Ax_opc_var1 ( int  m_A,
int  n_A,
scomplex buff_tau,
scomplex buff_beta,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_u,
int  inc_u,
scomplex buff_a,
int  inc_a,
scomplex buff_y,
int  inc_y,
scomplex buff_w,
int  inc_w 
)
331{
338 int i;
339
340 bl1_csetv( m_A,
341 buff_0,
342 buff_w, inc_w );
343
345
346 for ( i = 0; i < n_A; ++i )
347 {
348 scomplex* a1 = buff_A + (i )*cs_A + (0 )*rs_A;
349 scomplex* psi1 = buff_y + (i )*inc_y;
350 scomplex* alpha1 = buff_a + (i )*inc_a;
351 scomplex* u = buff_u;
352 scomplex* w = buff_w;
353
354 /*------------------------------------------------------------*/
355
357 m_A,
358 buff_1,
359 a1, rs_A,
360 u, inc_u,
361 buff_beta,
362 psi1 );
363
366
368
370 m_A,
372 a1, rs_A,
373 w, inc_w );
374/*
375 F77_caxpy( &m_A,
376 &conj_alpha1,
377 a1, &rs_A,
378 w, &inc_w );
379*/
380
381 /*------------------------------------------------------------*/
382
383 }
384
385 return FLA_SUCCESS;
386}
double *restrict psi1
Definition bl1_axmyv2.c:141
double *restrict alpha1
Definition bl1_axpyv2bdotaxpy.c:198
void bl1_cdots(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy, scomplex *beta, scomplex *rho)
Definition bl1_dots.c:39

References alpha1, bl1_caxpyv(), bl1_cdots(), bl1_csetv(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, i, and psi1.

Referenced by FLA_Bidiag_UT_u_step_ofc_var3(), FLA_Bidiag_UT_u_step_ofc_var4(), and FLA_Fused_Ahx_Axpy_Ax_opt_var1().

◆ FLA_Fused_Ahx_Axpy_Ax_opd_var1()

FLA_Error FLA_Fused_Ahx_Axpy_Ax_opd_var1 ( int  m_A,
int  n_A,
double buff_tau,
double buff_beta,
double buff_A,
int  rs_A,
int  cs_A,
double buff_u,
int  inc_u,
double buff_a,
int  inc_a,
double buff_y,
int  inc_y,
double buff_w,
int  inc_w 
)
216{
217 double zero = bl1_d0();
218 double minus_one = bl1_dm1();
219 double* restrict u = buff_u;
220 double* restrict w = buff_w;
221 double* restrict beta = buff_beta;
222 double* restrict a1;
223 double* restrict a2;
224 double* restrict psi1;
225 double* restrict psi2;
226 double* restrict alpha1;
227 double* restrict alpha2;
228
229 double minus_inv_tau;
230 int i;
231
232 int n_run = n_A / 2;
233 int n_left = n_A % 2;
234 int stepcs_A = 2*cs_A;
235 int stepinc_y = 2*inc_y;
236 int stepinc_a = 2*inc_a;
237
238
239 bl1_dsetv( m_A,
240 &zero,
241 buff_w, inc_w );
242
244
245 a1 = buff_A;
246 a2 = buff_A + cs_A;
247 psi1 = buff_y;
248 psi2 = buff_y + inc_y;
249 alpha1 = buff_a;
250 alpha2 = buff_a + inc_a;
251
252 for ( i = 0; i < n_run; ++i )
253 {
254/*
255 Effective computation:
256 y = beta * y + A' * u;
257 a = a - conj(y) / tau;
258 w = A * conj(a);
259*/
260 /*------------------------------------------------------------*/
261
263 m_A,
264 a1, rs_A,
265 a2, rs_A,
266 u, inc_u,
267 beta,
268 psi1,
269 psi2 );
270
273
275 alpha1,
276 alpha2,
277 a1, rs_A,
278 a2, rs_A,
279 w, inc_w );
280
281 /*------------------------------------------------------------*/
282
283 a1 += stepcs_A;
284 a2 += stepcs_A;
285 psi1 += stepinc_y;
286 psi2 += stepinc_y;
287 alpha1 += stepinc_a;
288 alpha2 += stepinc_a;
289 }
290
291 if ( n_left == 1 )
292 //for ( i = 0; i < n_left; ++i )
293 {
294 double rho1;
295
297 m_A,
298 a1, rs_A,
299 u, inc_u,
300 &rho1 );
302 bl1_dadd3( psi1, &rho1, psi1 );
303
305
307 m_A,
308 alpha1,
309 a1, rs_A,
310 w, inc_w );
311
312 //a1 += cs_A;
313 //psi1 += inc_y;
314 //alpha1 += inc_a;
315 }
316
317 return FLA_SUCCESS;
318}
int n_left
Definition bl1_axmyv2.c:149
int n_run
Definition bl1_axmyv2.c:148
void bl1_daxpyv2b(int n, double *alpha1, double *alpha2, double *x1, int inc_x1, double *x2, int inc_x2, double *y, int inc_y)
Definition bl1_axpyv2b.c:31
double rho1
Definition bl1_dotsv2.c:149
void bl1_ddotsv2(conj1_t conjxy, int n, double *x, int inc_x, double *y, int inc_y, double *z, int inc_z, double *beta, double *rho_xz, double *rho_yz)
Definition bl1_dotsv2.c:35
double *restrict alpha2
Definition bl1_dotv2axpyv2b.c:188
double bl1_dm1(void)
Definition bl1_constants.c:182
double bl1_d0(void)
Definition bl1_constants.c:118

References alpha1, alpha2, bl1_d0(), bl1_daxpyv(), bl1_daxpyv2b(), bl1_ddot(), bl1_ddotsv2(), bl1_dm1(), bl1_dsetv(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, i, n_left, n_run, psi1, and rho1.

Referenced by FLA_Bidiag_UT_u_step_ofd_var3(), FLA_Bidiag_UT_u_step_ofd_var4(), and FLA_Fused_Ahx_Axpy_Ax_opt_var1().

◆ FLA_Fused_Ahx_Axpy_Ax_ops_var1()

FLA_Error FLA_Fused_Ahx_Axpy_Ax_ops_var1 ( int  m_A,
int  n_A,
float buff_tau,
float buff_beta,
float buff_A,
int  rs_A,
int  cs_A,
float buff_u,
int  inc_u,
float buff_a,
int  inc_a,
float buff_y,
int  inc_y,
float buff_w,
int  inc_w 
)
152{
153 float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
154 float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
156 float minus_inv_tau;
157 int i;
158
159 bl1_ssetv( m_A,
160 buff_0,
161 buff_w, inc_w );
162
164
165 for ( i = 0; i < n_A; ++i )
166 {
167 float* a1 = buff_A + (i )*cs_A + (0 )*rs_A;
168 float* psi1 = buff_y + (i )*inc_y;
169 float* alpha1 = buff_a + (i )*inc_a;
170 float* u = buff_u;
171 float* w = buff_w;
172
173 /*------------------------------------------------------------*/
174
176 m_A,
177 buff_1,
178 a1, rs_A,
179 u, inc_u,
180 buff_beta,
181 psi1 );
182
183 // bl1_dmult4( &minus_inv_tau, conj_psi1, alpha1, alpha1 );
185
187 m_A,
188 alpha1,
189 a1, rs_A,
190 w, inc_w );
191/*
192 F77_saxpy( &m_A,
193 alpha1,
194 a1, &rs_A,
195 w, &inc_w );
196*/
197
198 /*------------------------------------------------------------*/
199
200 }
201
202 return FLA_SUCCESS;
203}
void bl1_sdots(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy, float *beta, float *rho)
Definition bl1_dots.c:13

References alpha1, bl1_saxpyv(), bl1_sdots(), bl1_ssetv(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, i, and psi1.

Referenced by FLA_Bidiag_UT_u_step_ofs_var3(), FLA_Bidiag_UT_u_step_ofs_var4(), and FLA_Fused_Ahx_Axpy_Ax_opt_var1().

◆ FLA_Fused_Ahx_Axpy_Ax_opt_var1()

FLA_Error FLA_Fused_Ahx_Axpy_Ax_opt_var1 ( FLA_Obj  A,
FLA_Obj  u,
FLA_Obj  tau,
FLA_Obj  a,
FLA_Obj  beta,
FLA_Obj  y,
FLA_Obj  w 
)
14{
15/*
16 Effective computation:
17 y = beta * y + A' * u;
18 a = a - conj(y) / tau;
19 w = A * conj(a);
20*/
21 FLA_Datatype datatype;
22 int m_A, n_A;
23 int rs_A, cs_A;
24 int inc_u, inc_a, inc_y, inc_w;
25
26 datatype = FLA_Obj_datatype( A );
27
28 m_A = FLA_Obj_length( A );
29 n_A = FLA_Obj_width( A );
30
33
35
37
39
41
42
43 switch ( datatype )
44 {
45 case FLA_FLOAT:
46 {
47 float* buff_A = FLA_FLOAT_PTR( A );
48 float* buff_u = FLA_FLOAT_PTR( u );
49 float* buff_a = FLA_FLOAT_PTR( a );
50 float* buff_y = FLA_FLOAT_PTR( y );
51 float* buff_w = FLA_FLOAT_PTR( w );
52 float* buff_tau = FLA_FLOAT_PTR( tau );
53 float* buff_beta = FLA_FLOAT_PTR( beta );
54
56 n_A,
63 buff_w, inc_w );
64
65 break;
66 }
67
68 case FLA_DOUBLE:
69 {
70 double* buff_A = FLA_DOUBLE_PTR( A );
71 double* buff_u = FLA_DOUBLE_PTR( u );
72 double* buff_a = FLA_DOUBLE_PTR( a );
73 double* buff_y = FLA_DOUBLE_PTR( y );
74 double* buff_w = FLA_DOUBLE_PTR( w );
75 double* buff_tau = FLA_DOUBLE_PTR( tau );
76 double* buff_beta = FLA_DOUBLE_PTR( beta );
77
79 n_A,
86 buff_w, inc_w );
87
88 break;
89 }
90
91 case FLA_COMPLEX:
92 {
100
102 n_A,
103 buff_tau,
104 buff_beta,
105 buff_A, rs_A, cs_A,
106 buff_u, inc_u,
107 buff_a, inc_a,
108 buff_y, inc_y,
109 buff_w, inc_w );
110
111 break;
112 }
113
115 {
123
125 n_A,
126 buff_tau,
127 buff_beta,
128 buff_A, rs_A, cs_A,
129 buff_u, inc_u,
130 buff_a, inc_a,
131 buff_y, inc_y,
132 buff_w, inc_w );
133
134 break;
135 }
136 }
137
138 return FLA_SUCCESS;
139}
FLA_Error FLA_Fused_Ahx_Axpy_Ax_opd_var1(int m_A, int n_A, double *buff_tau, double *buff_beta, double *buff_A, int rs_A, int cs_A, double *buff_u, int inc_u, double *buff_a, int inc_a, double *buff_y, int inc_y, double *buff_w, int inc_w)
Definition FLA_Fused_Ahx_Axpy_Ax_opt_var1.c:207
FLA_Error FLA_Fused_Ahx_Axpy_Ax_opc_var1(int m_A, int n_A, scomplex *buff_tau, scomplex *buff_beta, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_u, int inc_u, scomplex *buff_a, int inc_a, scomplex *buff_y, int inc_y, scomplex *buff_w, int inc_w)
Definition FLA_Fused_Ahx_Axpy_Ax_opt_var1.c:322
FLA_Error FLA_Fused_Ahx_Axpy_Ax_ops_var1(int m_A, int n_A, float *buff_tau, float *buff_beta, float *buff_A, int rs_A, int cs_A, float *buff_u, int inc_u, float *buff_a, int inc_a, float *buff_y, int inc_y, float *buff_w, int inc_w)
Definition FLA_Fused_Ahx_Axpy_Ax_opt_var1.c:143
FLA_Error FLA_Fused_Ahx_Axpy_Ax_opz_var1(int m_A, int n_A, dcomplex *buff_tau, dcomplex *buff_beta, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_u, int inc_u, dcomplex *buff_a, int inc_a, dcomplex *buff_y, int inc_y, dcomplex *buff_w, int inc_w)
Definition FLA_Fused_Ahx_Axpy_Ax_opt_var1.c:390
dim_t FLA_Obj_vector_inc(FLA_Obj obj)
Definition FLA_Query.c:145

References FLA_Fused_Ahx_Axpy_Ax_opc_var1(), FLA_Fused_Ahx_Axpy_Ax_opd_var1(), FLA_Fused_Ahx_Axpy_Ax_ops_var1(), FLA_Fused_Ahx_Axpy_Ax_opz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), FLA_Obj_width(), and i.

◆ FLA_Fused_Ahx_Axpy_Ax_opz_var1()

FLA_Error FLA_Fused_Ahx_Axpy_Ax_opz_var1 ( int  m_A,
int  n_A,
dcomplex buff_tau,
dcomplex buff_beta,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_u,
int  inc_u,
dcomplex buff_a,
int  inc_a,
dcomplex buff_y,
int  inc_y,
dcomplex buff_w,
int  inc_w 
)
399{
400 dcomplex zero = bl1_z0();
411
417 int i;
418 int n_run = n_A / 2;
419 int n_left = n_A % 2;
420 int twocs_A = 2*cs_A;
421 int twoinc_y = 2*inc_y;
422 int twoinc_a = 2*inc_a;
423
424
425 bl1_zsetv( m_A,
426 &zero,
427 buff_w, inc_w );
428
430
431 a1 = buff_A;
432 a2 = buff_A + cs_A;
433 psi1 = buff_y;
434 psi2 = buff_y + inc_y;
435 alpha1 = buff_a;
436 alpha2 = buff_a + inc_a;
437
438 for ( i = 0; i < n_run; ++i )
439 {
440/*
441 Effective computation:
442 y = beta * y + A' * u;
443 a = a - conj(y) / tau;
444 w = A * conj(a);
445*/
446 /*------------------------------------------------------------*/
447
449 m_A,
450 a1, rs_A,
451 a2, rs_A,
452 u, inc_u,
453 beta,
454 psi1,
455 psi2 );
456
463
467 a1, rs_A,
468 a2, rs_A,
469 w, inc_w );
470
471 /*------------------------------------------------------------*/
472
473 a1 += twocs_A;
474 a2 += twocs_A;
475 psi1 += twoinc_y;
476 psi2 += twoinc_y;
477 alpha1 += twoinc_a;
478 alpha2 += twoinc_a;
479 }
480
481 if ( n_left == 1 )
482 {
484
486 m_A,
487 a1, rs_A,
488 u, inc_u,
489 &rho1 );
491 bl1_zadd3( psi1, &rho1, psi1 );
492
496
498 m_A,
500 a1, rs_A,
501 w, inc_w );
502 }
503
504 return FLA_SUCCESS;
505}
int twoinc_y
Definition bl1_axpyv2b.c:154
void bl1_zaxpyv2b(int n, dcomplex *alpha1, dcomplex *alpha2, dcomplex *x1, int inc_x1, dcomplex *x2, int inc_x2, dcomplex *y, int inc_y)
Definition bl1_axpyv2b.c:210
void bl1_zdotsv2(conj1_t conjxy, int n, dcomplex *x, int inc_x, dcomplex *y, int inc_y, dcomplex *z, int inc_z, dcomplex *beta, dcomplex *rho_xz, dcomplex *rho_yz)
Definition bl1_dotsv2.c:248
dcomplex bl1_z0(void)
Definition bl1_constants.c:133
dcomplex bl1_zm1(void)
Definition bl1_constants.c:197

References alpha1, alpha2, bl1_z0(), bl1_zaxpyv(), bl1_zaxpyv2b(), bl1_zdot(), bl1_zdotsv2(), bl1_zm1(), bl1_zscals(), bl1_zsetv(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, i, n_left, n_run, psi1, rho1, and twoinc_y.

Referenced by FLA_Bidiag_UT_u_step_ofz_var3(), FLA_Bidiag_UT_u_step_ofz_var4(), and FLA_Fused_Ahx_Axpy_Ax_opt_var1().

◆ FLA_Fused_Gerc2_Ahx_Axpy_Ax_opc_var1()

FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_opc_var1 ( int  m_A,
int  n_A,
scomplex buff_tau,
scomplex buff_alpha,
scomplex buff_u,
int  inc_u,
scomplex buff_y,
int  inc_y,
scomplex buff_z,
int  inc_z,
scomplex buff_v,
int  inc_v,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_up,
int  inc_up,
scomplex buff_a,
int  inc_a,
scomplex buff_w,
int  inc_w 
)
436{
443 int i;
444
445 bl1_csetv( m_A,
446 buff_0,
447 buff_w, inc_w );
448
450
451 for ( i = 0; i < n_A; ++i )
452 {
453 scomplex* a1 = buff_A + (i )*cs_A + (0 )*rs_A;
454 scomplex* u = buff_u;
455 scomplex* psi1 = buff_y + (i )*inc_y;
456 scomplex* nu1 = buff_v + (i )*inc_v;
457 scomplex* z = buff_z;
459 scomplex* alpha1 = buff_a + (i )*inc_a;
460 scomplex* w = buff_w;
464
465 /*------------------------------------------------------------*/
466
469
472
474 m_A,
475 &temp1,
476 u, inc_u,
477 a1, rs_A );
478 //F77_caxpy( &m_A,
479 // &temp1,
480 // u, &inc_u,
481 // a1, &rs_A );
482
484 m_A,
485 &temp2,
486 z, inc_z,
487 a1, rs_A );
488 //F77_caxpy( &m_A,
489 // &temp2,
490 // z, &inc_z,
491 // a1, &rs_A );
492
494 m_A,
495 a1, rs_A,
496 up, inc_up,
497 psi1 );
498
501
503
505 m_A,
507 a1, rs_A,
508 w, inc_w );
509 //F77_caxpy( &m_A,
510 // &conj_alpha1,
511 // a1, &rs_A,
512 // w, &inc_w );
513
514 /*------------------------------------------------------------*/
515
516 }
517
518 return FLA_SUCCESS;
519}
double temp2
Definition bl1_axpyv2b.c:147
double temp1
Definition bl1_axpyv2b.c:146

References alpha1, bl1_caxpyv(), bl1_cdot(), bl1_csetv(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, FLA_MINUS_ONE, FLA_ZERO, i, psi1, temp1, and temp2.

Referenced by FLA_Bidiag_UT_u_step_ofc_var3(), and FLA_Fused_Gerc2_Ahx_Axpy_Ax_opt_var1().

◆ FLA_Fused_Gerc2_Ahx_Axpy_Ax_opd_var1()

FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_opd_var1 ( int  m_A,
int  n_A,
double buff_tau,
double buff_alpha,
double buff_u,
int  inc_u,
double buff_y,
int  inc_y,
double buff_z,
int  inc_z,
double buff_v,
int  inc_v,
double buff_A,
int  rs_A,
int  cs_A,
double buff_up,
int  inc_up,
double buff_a,
int  inc_a,
double buff_w,
int  inc_w 
)
279{
280 double zero = bl1_d0();
281 double minus_one = bl1_dm1();
282 double* restrict u = buff_u;
283 double* restrict up = buff_up;
284 double* restrict w = buff_w;
285 double* restrict z = buff_z;
286 double* restrict alpha = buff_alpha;
287 double* restrict a1;
288 double* restrict a2;
289 double* restrict psi1;
290 double* restrict psi2;
291 double* restrict alpha1;
292 double* restrict alpha2;
293 double* restrict nu1;
294 double* restrict nu2;
295
296 double minus_inv_tau;
297 double alpha_conj_psi1;
298 double alpha_conj_psi2;
299 double alpha_conj_nu1;
300 double alpha_conj_nu2;
301 int i;
302 int n_run = n_A / 2;
303 int n_left = n_A % 2;
304 int twocs_A = 2*cs_A;
305 int twoinc_y = 2*inc_y;
306 int twoinc_a = 2*inc_a;
307 int twoinc_v = 2*inc_v;
308
309
310 bl1_dsetv( m_A,
311 &zero,
312 buff_w, inc_w );
313
315
316 a1 = buff_A;
317 a2 = buff_A + cs_A;
318 psi1 = buff_y;
319 psi2 = buff_y + inc_y;
320 alpha1 = buff_a;
321 alpha2 = buff_a + inc_a;
322 nu1 = buff_v;
323 nu2 = buff_v + inc_v;
324
325 for ( i = 0; i < n_run; ++i )
326 {
327
328 /*------------------------------------------------------------*/
329
332
335
336/*
337 Effective computation:
338 A = A + alpha * ( u * y' + z * v' );
339 y = A' * up;
340 a = a - conj(y) / tau;
341 w = A * conj(a);
342*/
346 u, inc_u,
347 z, inc_z,
348 a1, rs_A );
352 u, inc_u,
353 z, inc_z,
354 a2, rs_A );
355
356
358 m_A,
359 a1, rs_A,
360 a2, rs_A,
361 up, inc_up,
362 &zero,
363 psi1,
364 psi2 );
365
368
370 alpha1,
371 alpha2,
372 a1, rs_A,
373 a2, rs_A,
374 w, inc_w );
375
376 /*------------------------------------------------------------*/
377
378 a1 += twocs_A;
379 a2 += twocs_A;
380 psi1 += twoinc_y;
381 psi2 += twoinc_y;
382 alpha1 += twoinc_a;
383 alpha2 += twoinc_a;
384 nu1 += twoinc_v;
385 nu2 += twoinc_v;
386 }
387
388 if ( n_left == 1 )
389 {
390 double rho1;
391
394
398 u, inc_u,
399 z, inc_z,
400 a1, rs_A );
401
403 m_A,
404 a1, rs_A,
405 up, inc_up,
406 &rho1 );
407 bl1_dscals( &zero, psi1 );
408 bl1_dadd3( psi1, &rho1, psi1 );
409
411
413 m_A,
414 alpha1,
415 a1, rs_A,
416 w, inc_w );
417 }
418
419 return FLA_SUCCESS;
420}

References alpha1, alpha2, bl1_d0(), bl1_daxpyv(), bl1_daxpyv2b(), bl1_ddot(), bl1_ddotsv2(), bl1_dm1(), bl1_dsetv(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, i, n_left, n_run, psi1, rho1, and twoinc_y.

Referenced by FLA_Bidiag_UT_u_step_ofd_var3(), and FLA_Fused_Gerc2_Ahx_Axpy_Ax_opt_var1().

◆ FLA_Fused_Gerc2_Ahx_Axpy_Ax_ops_var1()

FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_ops_var1 ( int  m_A,
int  n_A,
float buff_tau,
float buff_alpha,
float buff_u,
int  inc_u,
float buff_y,
int  inc_y,
float buff_z,
int  inc_z,
float buff_v,
int  inc_v,
float buff_A,
int  rs_A,
int  cs_A,
float buff_up,
int  inc_up,
float buff_a,
int  inc_a,
float buff_w,
int  inc_w 
)
182{
183 float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
185 float minus_inv_tau;
186 int i;
187
188 bl1_ssetv( m_A,
189 buff_0,
190 buff_w, inc_w );
191
193
194 for ( i = 0; i < n_A; ++i )
195 {
196 float* a1 = buff_A + (i )*cs_A + (0 )*rs_A;
197 float* u = buff_u;
198 float* psi1 = buff_y + (i )*inc_y;
199 float* nu1 = buff_v + (i )*inc_v;
200 float* z = buff_z;
201 float* up = buff_up;
202 float* alpha1 = buff_a + (i )*inc_a;
203 float* w = buff_w;
204 float* alpha = buff_alpha;
205 float temp1;
206 float temp2;
207
208 /*------------------------------------------------------------*/
209
210 // bl1_smult3( alpha, psi1, &temp1 );
211 temp1 = *alpha * *psi1;
212
213 // bl1_smult3( alpha, nu1, &temp2 );
214 temp2 = *alpha * *nu1;
215
217 m_A,
218 &temp1,
219 u, inc_u,
220 a1, rs_A );
221 //F77_saxpy( &m_A,
222 // &temp1,
223 // u, &inc_u,
224 // a1, &rs_A );
225
227 m_A,
228 &temp2,
229 z, inc_z,
230 a1, rs_A );
231 //F77_saxpy( &m_A,
232 // &temp2,
233 // z, &inc_z,
234 // a1, &rs_A );
235
237 m_A,
238 a1, rs_A,
239 up, inc_up,
240 psi1 );
241 //*psi1 = F77_sdot( &m_A,
242 // a1, &rs_A,
243 // up, &inc_up );
244
245 // bl1_smult4( &minus_inv_tau, psi1, alpha1, alpha1 );
247
249 m_A,
250 alpha1,
251 a1, rs_A,
252 w, inc_w );
253 //F77_saxpy( &m_A,
254 // alpha1,
255 // a1, &rs_A,
256 // w, &inc_w );
257
258 /*------------------------------------------------------------*/
259
260 }
261
262 return FLA_SUCCESS;
263}

References alpha1, bl1_saxpyv(), bl1_sdot(), bl1_ssetv(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, FLA_MINUS_ONE, FLA_ZERO, i, psi1, temp1, and temp2.

Referenced by FLA_Bidiag_UT_u_step_ofs_var3(), and FLA_Fused_Gerc2_Ahx_Axpy_Ax_opt_var1().

◆ FLA_Fused_Gerc2_Ahx_Axpy_Ax_opt_var1()

FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_opt_var1 ( FLA_Obj  alpha,
FLA_Obj  tau,
FLA_Obj  u,
FLA_Obj  y,
FLA_Obj  z,
FLA_Obj  v,
FLA_Obj  A,
FLA_Obj  up,
FLA_Obj  a,
FLA_Obj  w 
)
14{
15/*
16 Effective computation:
17 A = A + alpha * ( u * y' + z * v' );
18 y = A' * up;
19 a = a - conj(y) / tau;
20 w = A * conj(a);
21*/
22 FLA_Datatype datatype;
23 int m_A, n_A;
24 int rs_A, cs_A;
25 int inc_u, inc_y, inc_z, inc_v;
26 int inc_up, inc_a, inc_w;
27
28 datatype = FLA_Obj_datatype( A );
29
30 m_A = FLA_Obj_length( A );
31 n_A = FLA_Obj_width( A );
32
35
40
44
45
46 switch ( datatype )
47 {
48 case FLA_FLOAT:
49 {
50 float* buff_A = FLA_FLOAT_PTR( A );
51 float* buff_u = FLA_FLOAT_PTR( u );
52 float* buff_y = FLA_FLOAT_PTR( y );
53 float* buff_z = FLA_FLOAT_PTR( z );
54 float* buff_v = FLA_FLOAT_PTR( v );
55 float* buff_up = FLA_FLOAT_PTR( up );
56 float* buff_a = FLA_FLOAT_PTR( a );
57 float* buff_w = FLA_FLOAT_PTR( w );
58 float* buff_tau = FLA_FLOAT_PTR( tau );
59 float* buff_alpha = FLA_FLOAT_PTR( alpha );
60
62 n_A,
72 buff_w, inc_w );
73
74 break;
75 }
76
77 case FLA_DOUBLE:
78 {
79 double* buff_A = FLA_DOUBLE_PTR( A );
80 double* buff_u = FLA_DOUBLE_PTR( u );
81 double* buff_y = FLA_DOUBLE_PTR( y );
82 double* buff_z = FLA_DOUBLE_PTR( z );
83 double* buff_v = FLA_DOUBLE_PTR( v );
84 double* buff_up = FLA_DOUBLE_PTR( up );
85 double* buff_a = FLA_DOUBLE_PTR( a );
86 double* buff_w = FLA_DOUBLE_PTR( w );
87 double* buff_tau = FLA_DOUBLE_PTR( tau );
88 double* buff_alpha = FLA_DOUBLE_PTR( alpha );
89
91 n_A,
100 buff_a, inc_a,
101 buff_w, inc_w );
102
103 break;
104 }
105
106 case FLA_COMPLEX:
107 {
118
120 n_A,
121 buff_tau,
123 buff_u, inc_u,
124 buff_y, inc_y,
125 buff_z, inc_z,
126 buff_v, inc_v,
127 buff_A, rs_A, cs_A,
129 buff_a, inc_a,
130 buff_w, inc_w );
131
132 break;
133 }
134
136 {
147
149 n_A,
150 buff_tau,
152 buff_u, inc_u,
153 buff_y, inc_y,
154 buff_z, inc_z,
155 buff_v, inc_v,
156 buff_A, rs_A, cs_A,
158 buff_a, inc_a,
159 buff_w, inc_w );
160
161 break;
162 }
163 }
164
165 return FLA_SUCCESS;
166}
FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_ops_var1(int m_A, int n_A, float *buff_tau, float *buff_alpha, float *buff_u, int inc_u, float *buff_y, int inc_y, float *buff_z, int inc_z, float *buff_v, int inc_v, float *buff_A, int rs_A, int cs_A, float *buff_up, int inc_up, float *buff_a, int inc_a, float *buff_w, int inc_w)
Definition FLA_Fused_Gerc2_Ahx_Axpy_Ax_opt_var1.c:170
FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_opz_var1(int m_A, int n_A, dcomplex *buff_tau, dcomplex *buff_alpha, dcomplex *buff_u, int inc_u, dcomplex *buff_y, int inc_y, dcomplex *buff_z, int inc_z, dcomplex *buff_v, int inc_v, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_up, int inc_up, dcomplex *buff_a, int inc_a, dcomplex *buff_w, int inc_w)
Definition FLA_Fused_Gerc2_Ahx_Axpy_Ax_opt_var1.c:523
FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_opc_var1(int m_A, int n_A, scomplex *buff_tau, scomplex *buff_alpha, scomplex *buff_u, int inc_u, scomplex *buff_y, int inc_y, scomplex *buff_z, int inc_z, scomplex *buff_v, int inc_v, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_up, int inc_up, scomplex *buff_a, int inc_a, scomplex *buff_w, int inc_w)
Definition FLA_Fused_Gerc2_Ahx_Axpy_Ax_opt_var1.c:424
FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_opd_var1(int m_A, int n_A, double *buff_tau, double *buff_alpha, double *buff_u, int inc_u, double *buff_y, int inc_y, double *buff_z, int inc_z, double *buff_v, int inc_v, double *buff_A, int rs_A, int cs_A, double *buff_up, int inc_up, double *buff_a, int inc_a, double *buff_w, int inc_w)
Definition FLA_Fused_Gerc2_Ahx_Axpy_Ax_opt_var1.c:267

References FLA_Fused_Gerc2_Ahx_Axpy_Ax_opc_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_opd_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_ops_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_opz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), FLA_Obj_width(), and i.

◆ FLA_Fused_Gerc2_Ahx_Axpy_Ax_opz_var1()

FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_opz_var1 ( int  m_A,
int  n_A,
dcomplex buff_tau,
dcomplex buff_alpha,
dcomplex buff_u,
int  inc_u,
dcomplex buff_y,
int  inc_y,
dcomplex buff_z,
int  inc_z,
dcomplex buff_v,
int  inc_v,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_up,
int  inc_up,
dcomplex buff_a,
int  inc_a,
dcomplex buff_w,
int  inc_w 
)
535{
536 dcomplex zero = bl1_z0();
551
563 int i;
564 int n_run = n_A / 2;
565 int n_left = n_A % 2;
566 int twocs_A = 2*cs_A;
567 int twoinc_y = 2*inc_y;
568 int twoinc_a = 2*inc_a;
569 int twoinc_v = 2*inc_v;
570
571
572 bl1_zsetv( m_A,
573 &zero,
574 buff_w, inc_w );
575
577
578 a1 = buff_A;
579 a2 = buff_A + cs_A;
580 psi1 = buff_y;
581 psi2 = buff_y + inc_y;
582 alpha1 = buff_a;
583 alpha2 = buff_a + inc_a;
584 nu1 = buff_v;
585 nu2 = buff_v + inc_v;
586
587 for ( i = 0; i < n_run; ++i )
588 {
589
590 /*------------------------------------------------------------*/
591
596
601
605 u, inc_u,
606 z, inc_z,
607 a1, rs_A );
611 u, inc_u,
612 z, inc_z,
613 a2, rs_A );
614
615
617 m_A,
618 a1, rs_A,
619 a2, rs_A,
620 up, inc_up,
621 &zero,
622 psi1,
623 psi2 );
624
631
635 a1, rs_A,
636 a2, rs_A,
637 w, inc_w );
638
639 /*------------------------------------------------------------*/
640
641 a1 += twocs_A;
642 a2 += twocs_A;
643 psi1 += twoinc_y;
644 psi2 += twoinc_y;
645 alpha1 += twoinc_a;
646 alpha2 += twoinc_a;
647 nu1 += twoinc_v;
648 nu2 += twoinc_v;
649 }
650
651 if ( n_left == 1 )
652 {
654
659
663 u, inc_u,
664 z, inc_z,
665 a1, rs_A );
666
668 m_A,
669 a1, rs_A,
670 up, inc_up,
671 &rho1 );
672 bl1_zscals( &zero, psi1 );
673 bl1_zadd3( psi1, &rho1, psi1 );
674
678
680 m_A,
682 a1, rs_A,
683 w, inc_w );
684 }
685
686 return FLA_SUCCESS;
687}

References alpha1, alpha2, bl1_z0(), bl1_zaxpyv(), bl1_zaxpyv2b(), bl1_zdot(), bl1_zdotsv2(), bl1_zm1(), bl1_zscals(), bl1_zsetv(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, i, n_left, n_run, psi1, rho1, and twoinc_y.

Referenced by FLA_Bidiag_UT_u_step_ofz_var3(), and FLA_Fused_Gerc2_Ahx_Axpy_Ax_opt_var1().

◆ FLA_Fused_Gerc2_opc_var1()

FLA_Error FLA_Fused_Gerc2_opc_var1 ( int  m_A,
int  n_A,
scomplex buff_alpha,
scomplex buff_u,
int  inc_u,
scomplex buff_y,
int  inc_y,
scomplex buff_z,
int  inc_z,
scomplex buff_v,
int  inc_v,
scomplex buff_A,
int  rs_A,
int  cs_A 
)
249{
250 int i;
251
252 for ( i = 0; i < n_A; ++i )
253 {
254 scomplex* a1 = buff_A + (i )*cs_A + (0 )*rs_A;
255 scomplex* u = buff_u;
256 scomplex* psi1 = buff_y + (i )*inc_y;
257 scomplex* z = buff_z;
258 scomplex* nu1 = buff_v + (i )*inc_v;
264
265 /*------------------------------------------------------------*/
266
269
272
274 m_A,
275 &temp1,
276 u, inc_u,
277 a1, rs_A );
278/*
279 F77_caxpy( &m_A,
280 &temp1,
281 u, &inc_u,
282 a1, &rs_A );
283*/
284
286 m_A,
287 &temp2,
288 z, inc_z,
289 a1, rs_A );
290/*
291 F77_caxpy( &m_A,
292 &temp2,
293 z, &inc_z,
294 a1, &rs_A );
295*/
296
297 /*------------------------------------------------------------*/
298
299 }
300
301 return FLA_SUCCESS;
302}

References bl1_caxpyv(), BLIS1_NO_CONJUGATE, i, psi1, temp1, and temp2.

Referenced by FLA_Bidiag_UT_u_step_ofc_var2(), FLA_Fused_Gerc2_opt_var1(), FLA_Hess_UT_step_ofc_var2(), and FLA_Hess_UT_step_ofc_var3().

◆ FLA_Fused_Gerc2_opd_var1()

FLA_Error FLA_Fused_Gerc2_opd_var1 ( int  m_A,
int  n_A,
double buff_alpha,
double buff_u,
int  inc_u,
double buff_y,
int  inc_y,
double buff_z,
int  inc_z,
double buff_v,
int  inc_v,
double buff_A,
int  rs_A,
int  cs_A 
)
201{
202 int i;
203
204 for ( i = 0; i < n_A; ++i )
205 {
206/*
207 Effective computation:
208 A = A + alpha * ( u * y' + z * v' );
209*/
210 double* restrict a1 = buff_A + (i )*cs_A + (0 )*rs_A;
211 double* restrict u = buff_u;
212 double* restrict psi1 = buff_y + (i )*inc_y;
213 double* restrict z = buff_z;
214 double* restrict nu1 = buff_v + (i )*inc_v;
215 double* restrict alpha = buff_alpha;
216 double alpha_conj_psi1;
217 double alpha_conj_nu1;
218
219 /*------------------------------------------------------------*/
220
222
224
228 u, inc_u,
229 z, inc_z,
230 a1, rs_A );
231
232 /*------------------------------------------------------------*/
233
234 }
235
236 return FLA_SUCCESS;
237}

References bl1_daxpyv2b(), i, and psi1.

Referenced by FLA_Bidiag_UT_u_step_ofd_var2(), FLA_Fused_Gerc2_opt_var1(), FLA_Hess_UT_step_ofd_var2(), and FLA_Hess_UT_step_ofd_var3().

◆ FLA_Fused_Gerc2_ops_var1()

FLA_Error FLA_Fused_Gerc2_ops_var1 ( int  m_A,
int  n_A,
float buff_alpha,
float buff_u,
int  inc_u,
float buff_y,
int  inc_y,
float buff_z,
int  inc_z,
float buff_v,
int  inc_v,
float buff_A,
int  rs_A,
int  cs_A 
)
138{
139 int i;
140
141 for ( i = 0; i < n_A; ++i )
142 {
143 float* a1 = buff_A + (i )*cs_A + (0 )*rs_A;
144 float* u = buff_u;
145 float* psi1 = buff_y + (i )*inc_y;
146 float* z = buff_z;
147 float* nu1 = buff_v + (i )*inc_v;
148 float* alpha = buff_alpha;
149 float temp1;
150 float temp2;
151
152 /*------------------------------------------------------------*/
153
154 // bl1_smult3( alpha, psi1, &temp1 );
155 temp1 = *alpha * *psi1;
156
157 // bl1_smult3( alpha, nu1, &temp2 );
158 temp2 = *alpha * *nu1;
159
161 m_A,
162 &temp1,
163 u, inc_u,
164 a1, rs_A );
165/*
166 F77_saxpy( &m_A,
167 &temp1,
168 u, &inc_u,
169 a1, &rs_A );
170*/
171
173 m_A,
174 &temp2,
175 z, inc_z,
176 a1, rs_A );
177/*
178 F77_saxpy( &m_A,
179 &temp2,
180 z, &inc_z,
181 a1, &rs_A );
182*/
183
184 /*------------------------------------------------------------*/
185
186 }
187
188 return FLA_SUCCESS;
189}

References bl1_saxpyv(), BLIS1_NO_CONJUGATE, i, psi1, temp1, and temp2.

Referenced by FLA_Bidiag_UT_u_step_ofs_var2(), FLA_Fused_Gerc2_opt_var1(), FLA_Hess_UT_step_ofs_var2(), and FLA_Hess_UT_step_ofs_var3().

◆ FLA_Fused_Gerc2_opt_var1()

FLA_Error FLA_Fused_Gerc2_opt_var1 ( FLA_Obj  alpha,
FLA_Obj  u,
FLA_Obj  y,
FLA_Obj  z,
FLA_Obj  v,
FLA_Obj  A 
)
14{
15/*
16 Effective computation:
17 A = A + alpha * ( u * y' + z * v' );
18*/
19 FLA_Datatype datatype;
20 int m_A, n_A;
21 int rs_A, cs_A;
22 int inc_u, inc_y, inc_z, inc_v;
23
24 datatype = FLA_Obj_datatype( A );
25
26 m_A = FLA_Obj_length( A );
27 n_A = FLA_Obj_width( A );
28
31
36
37
38 switch ( datatype )
39 {
40 case FLA_FLOAT:
41 {
42 float* buff_A = FLA_FLOAT_PTR( A );
43 float* buff_u = FLA_FLOAT_PTR( u );
44 float* buff_y = FLA_FLOAT_PTR( y );
45 float* buff_z = FLA_FLOAT_PTR( z );
46 float* buff_v = FLA_FLOAT_PTR( v );
47 float* buff_alpha = FLA_FLOAT_PTR( alpha );
48
50 n_A,
56 buff_A, rs_A, cs_A );
57
58 break;
59 }
60
61 case FLA_DOUBLE:
62 {
63 double* buff_A = FLA_DOUBLE_PTR( A );
64 double* buff_u = FLA_DOUBLE_PTR( u );
65 double* buff_y = FLA_DOUBLE_PTR( y );
66 double* buff_z = FLA_DOUBLE_PTR( z );
67 double* buff_v = FLA_DOUBLE_PTR( v );
68 double* buff_alpha = FLA_DOUBLE_PTR( alpha );
69
71 n_A,
77 buff_A, rs_A, cs_A );
78
79 break;
80 }
81
82 case FLA_COMPLEX:
83 {
90
92 n_A,
98 buff_A, rs_A, cs_A );
99
100 break;
101 }
102
104 {
111
113 n_A,
115 buff_u, inc_u,
116 buff_y, inc_y,
117 buff_z, inc_z,
118 buff_v, inc_v,
119 buff_A, rs_A, cs_A );
120
121 break;
122 }
123 }
124
125 return FLA_SUCCESS;
126}
FLA_Error FLA_Fused_Gerc2_opd_var1(int m_A, int n_A, double *buff_alpha, double *buff_u, int inc_u, double *buff_y, int inc_y, double *buff_z, int inc_z, double *buff_v, int inc_v, double *buff_A, int rs_A, int cs_A)
Definition FLA_Fused_Gerc2_opt_var1.c:193
FLA_Error FLA_Fused_Gerc2_ops_var1(int m_A, int n_A, float *buff_alpha, float *buff_u, int inc_u, float *buff_y, int inc_y, float *buff_z, int inc_z, float *buff_v, int inc_v, float *buff_A, int rs_A, int cs_A)
Definition FLA_Fused_Gerc2_opt_var1.c:130
FLA_Error FLA_Fused_Gerc2_opz_var1(int m_A, int n_A, dcomplex *buff_alpha, dcomplex *buff_u, int inc_u, dcomplex *buff_y, int inc_y, dcomplex *buff_z, int inc_z, dcomplex *buff_v, int inc_v, dcomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Fused_Gerc2_opt_var1.c:306
FLA_Error FLA_Fused_Gerc2_opc_var1(int m_A, int n_A, scomplex *buff_alpha, scomplex *buff_u, int inc_u, scomplex *buff_y, int inc_y, scomplex *buff_z, int inc_z, scomplex *buff_v, int inc_v, scomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Fused_Gerc2_opt_var1.c:241

References FLA_Fused_Gerc2_opc_var1(), FLA_Fused_Gerc2_opd_var1(), FLA_Fused_Gerc2_ops_var1(), FLA_Fused_Gerc2_opz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), FLA_Obj_width(), and i.

◆ FLA_Fused_Gerc2_opz_var1()

FLA_Error FLA_Fused_Gerc2_opz_var1 ( int  m_A,
int  n_A,
dcomplex buff_alpha,
dcomplex buff_u,
int  inc_u,
dcomplex buff_y,
int  inc_y,
dcomplex buff_z,
int  inc_z,
dcomplex buff_v,
int  inc_v,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)
314{
315 int i;
316
317 for ( i = 0; i < n_A; ++i )
318 {
319 dcomplex* restrict a1 = buff_A + (i )*cs_A + (0 )*rs_A;
329
330 /*------------------------------------------------------------*/
331
334
337
341 u, inc_u,
342 z, inc_z,
343 a1, rs_A );
344
345 /*------------------------------------------------------------*/
346
347 }
348
349 return FLA_SUCCESS;
350}

References bl1_zaxpyv2b(), i, and psi1.

Referenced by FLA_Bidiag_UT_u_step_ofz_var2(), FLA_Fused_Gerc2_opt_var1(), FLA_Hess_UT_step_ofz_var2(), and FLA_Hess_UT_step_ofz_var3().

◆ FLA_Fused_UYx_ZVx_opc_var1()

FLA_Error FLA_Fused_UYx_ZVx_opc_var1 ( int  m_U,
int  n_U,
int  m_V,
int  n_V,
scomplex buff_delta,
scomplex buff_U,
int  rs_U,
int  cs_U,
scomplex buff_Y,
int  rs_Y,
int  cs_Y,
scomplex buff_Z,
int  rs_Z,
int  cs_Z,
scomplex buff_V,
int  rs_V,
int  cs_V,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_temp,
int  inc_temp,
scomplex buff_t,
int  inc_t,
scomplex buff_a,
int  inc_a,
scomplex buff_w,
int  inc_w,
scomplex buff_al,
int  inc_al 
)
439{
440 int i;
441 int m_A = m_U;
442 int m_Z = m_U;
443
445 m_A,
446 buff_A, rs_A,
447 buff_al, inc_al );
448
449 for ( i = 0; i < n_U; ++i )
450 {
451 scomplex* u1 = buff_U + (i )*cs_U + (0 )*rs_U;
452 scomplex* y1 = buff_Y + (i )*cs_Y + (0 )*rs_Y;
453 scomplex* z1 = buff_Z + (i )*cs_Z + (0 )*rs_Z;
454 scomplex* v1 = buff_V + (0 )*cs_V + (i )*rs_V;
455 scomplex* tau1 = buff_t + (i )*inc_t;
457 scomplex* a = buff_a;
458 scomplex* w = buff_w;
460 scomplex* psi20_l = buff_Y + (i )*cs_Y + (0 )*rs_Y;
461 scomplex* nu20_l = buff_V + (0 )*cs_V + (i )*rs_V;
466
467 /*------------------------------------------------------------*/
468
470 n_V,
471 y1, rs_Y,
472 a, inc_a,
473 &alpha );
474
476 n_V,
477 v1, cs_V,
478 a, inc_a,
479 &beta );
480
481 bl1_cconjs( &alpha );
482 bl1_cconjs( &beta );
485
486 *tau1 = beta;
487
489 bl1_cscals( delta, &beta );
492
494 m_U,
495 &alpha,
496 u1, rs_U,
497 w, inc_w );
498 //F77_caxpy( &m_U,
499 // &alpha,
500 // u1, &rs_U,
501 // w, &inc_w );
502
504 m_Z,
505 &beta,
506 z1, rs_Z,
507 w, inc_w );
508 //F77_caxpy( &m_Z,
509 // &beta,
510 // z1, &rs_Z,
511 // w, &inc_w );
512
514 m_U,
515 &gamma,
516 u1, rs_U,
517 al, inc_al );
518 //F77_caxpy( &m_U,
519 // &gamma,
520 // u1, &rs_U,
521 // al, &inc_al );
522
524 m_Z,
525 &kappa,
526 z1, rs_Z,
527 al, inc_al);
528 //F77_caxpy( &m_Z,
529 // &kappa,
530 // z1, &rs_Z,
531 // al, &inc_al );
532
533 /*------------------------------------------------------------*/
534
535 }
536
537 return FLA_SUCCESS;
538}
double *restrict z1
Definition bl1_dotsv2.c:148
double *restrict y1
Definition bl1_dotsv2.c:147

References bl1_caxpyv(), bl1_ccopyv(), bl1_cdot(), BLIS1_NO_CONJUGATE, i, y1, and z1.

Referenced by FLA_Bidiag_UT_u_step_ofc_var4(), and FLA_Fused_UYx_ZVx_opt_var1().

◆ FLA_Fused_UYx_ZVx_opd_var1()

FLA_Error FLA_Fused_UYx_ZVx_opd_var1 ( int  m_U,
int  n_U,
int  m_V,
int  n_V,
double buff_delta,
double buff_U,
int  rs_U,
int  cs_U,
double buff_Y,
int  rs_Y,
int  cs_Y,
double buff_Z,
int  rs_Z,
int  cs_Z,
double buff_V,
int  rs_V,
int  cs_V,
double buff_A,
int  rs_A,
int  cs_A,
double buff_temp,
int  inc_temp,
double buff_t,
int  inc_t,
double buff_a,
int  inc_a,
double buff_w,
int  inc_w,
double buff_al,
int  inc_al 
)
346{
347 double zero = bl1_d0();
348 int i;
349 int m_A = m_U;
350 int m_Z = m_U;
351
353 m_A,
354 buff_A, rs_A,
355 buff_al, inc_al );
356
357 if ( m_U == 0 || n_U == 0 ) return 0;
358 if ( m_V == 0 || n_V == 0 ) return 0;
359
360 for ( i = 0; i < n_U; ++i )
361 {
362 double* restrict u1 = buff_U + (i )*cs_U + (0 )*rs_U;
363 double* restrict y1 = buff_Y + (i )*cs_Y + (0 )*rs_Y;
364 double* restrict z1 = buff_Z + (i )*cs_Z + (0 )*rs_Z;
365 double* restrict v1 = buff_V + (0 )*cs_V + (i )*rs_V;
366 double* restrict tau1 = buff_t + (i )*inc_t;
367 double* restrict t1 = buff_temp;
368 double* restrict a = buff_a;
369 double* restrict w = buff_w;
370 double* restrict al = buff_al;
371 double* restrict psi20_l = buff_Y + (i )*cs_Y + (0 )*rs_Y;
372 double* restrict nu20_l = buff_V + (0 )*cs_V + (i )*rs_V;
373 double alpha;
374 double beta;
375 double gamma;
376 double kappa;
377
378 /*------------------------------------------------------------*/
379
381 n_V,
382 v1, cs_V,
383 t1, inc_t );
384
386 n_V,
387 y1, rs_Y,
388 t1, inc_t,
389 a, inc_a,
390 &zero,
391 &alpha,
392 &beta );
393
394 *tau1 = beta;
395
398
400 m_U,
401 &alpha,
402 &gamma,
403 u1, rs_U,
404 w, inc_w,
405 al, inc_al );
406
408 m_Z,
409 &beta,
410 &kappa,
411 z1, rs_U,
412 w, inc_w,
413 al, inc_al );
414
415 /*------------------------------------------------------------*/
416
417 }
418
419 return FLA_SUCCESS;
420}
void bl1_daxmyv2(conj1_t conjx, int n, double *alpha, double *beta, double *x, int inc_x, double *y, int inc_y, double *z, int inc_z)
Definition bl1_axmyv2.c:34

References bl1_d0(), bl1_daxmyv2(), bl1_dcopyv(), bl1_ddotsv2(), BLIS1_NO_CONJUGATE, i, y1, and z1.

Referenced by FLA_Bidiag_UT_u_step_ofd_var4(), and FLA_Fused_UYx_ZVx_opt_var1().

◆ FLA_Fused_UYx_ZVx_ops_var1()

FLA_Error FLA_Fused_UYx_ZVx_ops_var1 ( int  m_U,
int  n_U,
int  m_V,
int  n_V,
float buff_delta,
float buff_U,
int  rs_U,
int  cs_U,
float buff_Y,
int  rs_Y,
int  cs_Y,
float buff_Z,
int  rs_Z,
int  cs_Z,
float buff_V,
int  rs_V,
int  cs_V,
float buff_A,
int  rs_A,
int  cs_A,
float buff_temp,
int  inc_temp,
float buff_t,
int  inc_t,
float buff_a,
int  inc_a,
float buff_w,
int  inc_w,
float buff_al,
int  inc_al 
)
216{
217 int i;
218 int m_A = m_U;
219 int m_Z = m_U;
220
222 m_A,
223 buff_A, rs_A,
224 buff_al, inc_al );
225
226 for ( i = 0; i < n_U; ++i )
227 {
228 float* u1 = buff_U + (i )*cs_U + (0 )*rs_U;
229 float* y1 = buff_Y + (i )*cs_Y + (0 )*rs_Y;
230 float* z1 = buff_Z + (i )*cs_Z + (0 )*rs_Z;
231 float* v1 = buff_V + (0 )*cs_V + (i )*rs_V;
232 float* tau1 = buff_t + (i )*inc_t;
233 float* delta = buff_delta;
234 float* a = buff_a;
235 float* w = buff_w;
236 float* al = buff_al;
237 float* psi20_l = buff_Y + (i )*cs_Y + (0 )*rs_Y;
238 float* nu20_l = buff_V + (0 )*cs_V + (i )*rs_V;
239 float alpha;
240 float beta;
241 float gamma;
242 float kappa;
243
244 /*------------------------------------------------------------*/
245
247 n_V,
248 y1, rs_Y,
249 a, inc_a,
250 &alpha );
251 //alpha = F77_sdot( &n_V,
252 // y1, &rs_Y,
253 // a, &inc_a );
254
256 n_V,
257 v1, cs_V,
258 a, inc_a,
259 &beta );
260 //beta = F77_sdot( &n_V,
261 // v1, &cs_V,
262 // a, &inc_a );
263
264 *tau1 = beta;
265
266 // bl1_sconjs( &alpha );
267 // bl1_sconjs( &beta );
268 // bl1_scopyconj( psi20_l, &gamma );
269 // bl1_scopyconj( nu20_l, &kappa );
270 gamma = *psi20_l;
271 kappa = *nu20_l;
272
273 // bl1_dscals( delta, &alpha );
274 // bl1_dscals( delta, &beta );
275 // bl1_dscals( delta, &gamma );
276 // bl1_dscals( delta, &kappa );
277 alpha *= *delta;
278 beta *= *delta;
279 gamma *= *delta;
280 kappa *= *delta;
281
283 m_U,
284 &alpha,
285 u1, rs_U,
286 w, inc_w );
287 //F77_saxpy( &m_U,
288 // &alpha,
289 // u1, &rs_U,
290 // w, &inc_w );
291
293 m_Z,
294 &beta,
295 z1, rs_Z,
296 w, inc_w );
297 //F77_saxpy( &m_Z,
298 // &beta,
299 // z1, &rs_Z,
300 // w, &inc_w );
301
303 m_U,
304 &gamma,
305 u1, rs_U,
306 al, inc_al );
307 //F77_saxpy( &m_U,
308 // &gamma,
309 // u1, &rs_U,
310 // al, &inc_al );
311
313 m_Z,
314 &kappa,
315 z1, rs_Z,
316 al, inc_al );
317 //F77_saxpy( &m_Z,
318 // &kappa,
319 // z1, &rs_Z,
320 // al, &inc_al );
321
322 /*------------------------------------------------------------*/
323
324 }
325
326 return FLA_SUCCESS;
327}

References bl1_saxpyv(), bl1_scopyv(), bl1_sdot(), BLIS1_NO_CONJUGATE, i, y1, and z1.

Referenced by FLA_Bidiag_UT_u_step_ofs_var4(), and FLA_Fused_UYx_ZVx_opt_var1().

◆ FLA_Fused_UYx_ZVx_opt_var1()

FLA_Error FLA_Fused_UYx_ZVx_opt_var1 ( FLA_Obj  delta,
FLA_Obj  a,
FLA_Obj  U,
FLA_Obj  Y,
FLA_Obj  Z,
FLA_Obj  V,
FLA_Obj  A,
FLA_Obj  temp,
FLA_Obj  t,
FLA_Obj  w,
FLA_Obj  al 
)
14{
15/*
16 Effective computation:
17 w = w + delta * ( U ( Y' conj(a) ) + Z ( V' conj(a) ) );
18 al = A * e0 + delta * ( U ( Y' e0 ) + Z ( V' e0 ) );
19 t = V' conj(a);
20*/
21 FLA_Datatype datatype;
22 int m_U, n_U;
23 int m_V, n_V;
24 int rs_A, cs_A;
25 int rs_U, cs_U;
26 int rs_Y, cs_Y;
27 int rs_Z, cs_Z;
28 int rs_V, cs_V;
30
31 datatype = FLA_Obj_datatype( A );
32
33 m_U = FLA_Obj_length( U );
34 n_U = FLA_Obj_width( U );
35
36 m_V = FLA_Obj_length( V );
37 n_V = FLA_Obj_width( V );
38
41
44
47
50
53
59
60
61 switch ( datatype )
62 {
63 case FLA_FLOAT:
64 {
65 float* buff_A = FLA_FLOAT_PTR( A );
66 float* buff_U = FLA_FLOAT_PTR( U );
67 float* buff_Y = FLA_FLOAT_PTR( Y );
68 float* buff_Z = FLA_FLOAT_PTR( Z );
69 float* buff_V = FLA_FLOAT_PTR( V );
70 float* buff_temp = FLA_FLOAT_PTR( temp );
71 float* buff_t = FLA_FLOAT_PTR( t );
72 float* buff_a = FLA_FLOAT_PTR( a );
73 float* buff_w = FLA_FLOAT_PTR( w );
74 float* buff_al = FLA_FLOAT_PTR( al );
75 float* buff_delta = FLA_FLOAT_PTR( delta );
76
78 n_U,
79 m_V,
80 n_V,
91 buff_al, inc_al );
92
93 break;
94 }
95
96 case FLA_DOUBLE:
97 {
98 double* buff_A = FLA_DOUBLE_PTR( A );
99 double* buff_U = FLA_DOUBLE_PTR( U );
100 double* buff_Y = FLA_DOUBLE_PTR( Y );
101 double* buff_Z = FLA_DOUBLE_PTR( Z );
102 double* buff_V = FLA_DOUBLE_PTR( V );
103 double* buff_temp = FLA_DOUBLE_PTR( temp );
104 double* buff_t = FLA_DOUBLE_PTR( t );
105 double* buff_a = FLA_DOUBLE_PTR( a );
106 double* buff_w = FLA_DOUBLE_PTR( w );
107 double* buff_al = FLA_DOUBLE_PTR( al );
108 double* buff_delta = FLA_DOUBLE_PTR( delta );
109
111 n_U,
112 m_V,
113 n_V,
115 buff_U, rs_U, cs_U,
116 buff_Y, rs_Y, cs_Y,
117 buff_Z, rs_Z, cs_Z,
118 buff_V, rs_V, cs_V,
119 buff_A, rs_A, cs_A,
121 buff_t, inc_t,
122 buff_a, inc_a,
123 buff_w, inc_w,
124 buff_al, inc_al );
125
126 break;
127 }
128
129 case FLA_COMPLEX:
130 {
142
144 n_U,
145 m_V,
146 n_V,
148 buff_U, rs_U, cs_U,
149 buff_Y, rs_Y, cs_Y,
150 buff_Z, rs_Z, cs_Z,
151 buff_V, rs_V, cs_V,
152 buff_A, rs_A, cs_A,
154 buff_t, inc_t,
155 buff_a, inc_a,
156 buff_w, inc_w,
157 buff_al, inc_al );
158
159 break;
160 }
161
163 {
175
177 n_U,
178 m_V,
179 n_V,
181 buff_U, rs_U, cs_U,
182 buff_Y, rs_Y, cs_Y,
183 buff_Z, rs_Z, cs_Z,
184 buff_V, rs_V, cs_V,
185 buff_A, rs_A, cs_A,
187 buff_t, inc_t,
188 buff_a, inc_a,
189 buff_w, inc_w,
190 buff_al, inc_al );
191
192 break;
193 }
194 }
195
196 return FLA_SUCCESS;
197}
FLA_Error FLA_Fused_UYx_ZVx_ops_var1(int m_U, int n_U, int m_V, int n_V, float *buff_delta, float *buff_U, int rs_U, int cs_U, float *buff_Y, int rs_Y, int cs_Y, float *buff_Z, int rs_Z, int cs_Z, float *buff_V, int rs_V, int cs_V, float *buff_A, int rs_A, int cs_A, float *buff_temp, int inc_temp, float *buff_t, int inc_t, float *buff_a, int inc_a, float *buff_w, int inc_w, float *buff_al, int inc_al)
Definition FLA_Fused_UYx_ZVx_opt_var1.c:201
FLA_Error FLA_Fused_UYx_ZVx_opd_var1(int m_U, int n_U, int m_V, int n_V, double *buff_delta, double *buff_U, int rs_U, int cs_U, double *buff_Y, int rs_Y, int cs_Y, double *buff_Z, int rs_Z, int cs_Z, double *buff_V, int rs_V, int cs_V, double *buff_A, int rs_A, int cs_A, double *buff_temp, int inc_temp, double *buff_t, int inc_t, double *buff_a, int inc_a, double *buff_w, int inc_w, double *buff_al, int inc_al)
Definition FLA_Fused_UYx_ZVx_opt_var1.c:331
FLA_Error FLA_Fused_UYx_ZVx_opz_var1(int m_U, int n_U, int m_V, int n_V, dcomplex *buff_delta, dcomplex *buff_U, int rs_U, int cs_U, dcomplex *buff_Y, int rs_Y, int cs_Y, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_V, int rs_V, int cs_V, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_temp, int inc_temp, dcomplex *buff_t, int inc_t, dcomplex *buff_a, int inc_a, dcomplex *buff_w, int inc_w, dcomplex *buff_al, int inc_al)
Definition FLA_Fused_UYx_ZVx_opt_var1.c:542
FLA_Error FLA_Fused_UYx_ZVx_opc_var1(int m_U, int n_U, int m_V, int n_V, scomplex *buff_delta, scomplex *buff_U, int rs_U, int cs_U, scomplex *buff_Y, int rs_Y, int cs_Y, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_V, int rs_V, int cs_V, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_temp, int inc_temp, scomplex *buff_t, int inc_t, scomplex *buff_a, int inc_a, scomplex *buff_w, int inc_w, scomplex *buff_al, int inc_al)
Definition FLA_Fused_UYx_ZVx_opt_var1.c:424
dcomplex temp
Definition bl1_axpyv2b.c:301

References FLA_Fused_UYx_ZVx_opc_var1(), FLA_Fused_UYx_ZVx_opd_var1(), FLA_Fused_UYx_ZVx_ops_var1(), FLA_Fused_UYx_ZVx_opz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), FLA_Obj_width(), i, and temp.

◆ FLA_Fused_UYx_ZVx_opz_var1()

FLA_Error FLA_Fused_UYx_ZVx_opz_var1 ( int  m_U,
int  n_U,
int  m_V,
int  n_V,
dcomplex buff_delta,
dcomplex buff_U,
int  rs_U,
int  cs_U,
dcomplex buff_Y,
int  rs_Y,
int  cs_Y,
dcomplex buff_Z,
int  rs_Z,
int  cs_Z,
dcomplex buff_V,
int  rs_V,
int  cs_V,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_temp,
int  inc_temp,
dcomplex buff_t,
int  inc_t,
dcomplex buff_a,
int  inc_a,
dcomplex buff_w,
int  inc_w,
dcomplex buff_al,
int  inc_al 
)
557{
558 dcomplex zero = bl1_z0();
559 int i;
560 int m_A = m_U;
561 int m_Z = m_U;
562
564 m_A,
565 buff_A, rs_A,
566 buff_al, inc_al );
567
568 if ( m_U == 0 || n_U == 0 ) return 0;
569 if ( m_V == 0 || n_V == 0 ) return 0;
570
571 for ( i = 0; i < n_U; ++i )
572 {
573 dcomplex* restrict u1 = buff_U + (i )*cs_U + (0 )*rs_U;
574 dcomplex* restrict y1 = buff_Y + (i )*cs_Y + (0 )*rs_Y;
575 dcomplex* restrict z1 = buff_Z + (i )*cs_Z + (0 )*rs_Z;
576 dcomplex* restrict v1 = buff_V + (0 )*cs_V + (i )*rs_V;
581 dcomplex* restrict psi20_l = buff_Y + (i )*cs_Y + (0 )*rs_Y;
582 dcomplex* restrict nu20_l = buff_V + (0 )*cs_V + (i )*rs_V;
587
588 /*------------------------------------------------------------*/
589
591 n_V,
592 y1, rs_Y,
593 v1, cs_V,
594 a, inc_a,
595 &zero,
596 &alpha,
597 &beta );
598
599 bl1_zconjs( &alpha );
600 bl1_zconjs( &beta );
601
602 *tau1 = beta;
603
606
608 m_U,
609 &alpha,
610 &gamma,
611 u1, rs_U,
612 w, inc_w,
613 al, inc_al );
614
616 m_Z,
617 &beta,
618 &kappa,
619 z1, rs_U,
620 w, inc_w,
621 al, inc_al );
622
623 /*------------------------------------------------------------*/
624
625 }
626
627 return FLA_SUCCESS;
628}
void bl1_zaxmyv2(conj1_t conjx, int n, dcomplex *alpha, dcomplex *beta, dcomplex *x, int inc_x, dcomplex *y, int inc_y, dcomplex *z, int inc_z)
Definition bl1_axmyv2.c:250

References bl1_z0(), bl1_zaxmyv2(), bl1_zcopyv(), bl1_zdotsv2(), BLIS1_NO_CONJUGATE, i, y1, and z1.

Referenced by FLA_Bidiag_UT_u_step_ofz_var4(), and FLA_Fused_UYx_ZVx_opt_var1().