libflame revision_anchor
Functions
FLA_Bidiag_UT_u_fus_var3.c File Reference

(r)

Functions

FLA_Error FLA_Bidiag_UT_u_ofu_var3 (FLA_Obj A, FLA_Obj TU, FLA_Obj TV)
 
FLA_Error FLA_Bidiag_UT_u_step_ofu_var3 (FLA_Obj A, FLA_Obj T, FLA_Obj S)
 
FLA_Error FLA_Bidiag_UT_u_step_ofs_var3 (int m_A, int n_A, int m_TS, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T, float *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_ofd_var3 (int m_A, int n_A, int m_TS, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T, double *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_ofc_var3 (int m_A, int n_A, int m_TS, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T, scomplex *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_ofz_var3 (int m_A, int n_A, int m_TS, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T, dcomplex *buff_S, int rs_S, int cs_S)
 

Function Documentation

◆ FLA_Bidiag_UT_u_ofu_var3()

FLA_Error FLA_Bidiag_UT_u_ofu_var3 ( FLA_Obj  A,
FLA_Obj  TU,
FLA_Obj  TV 
)
14{
16}
FLA_Error FLA_Bidiag_UT_u_step_ofu_var3(FLA_Obj A, FLA_Obj T, FLA_Obj S)
Definition FLA_Bidiag_UT_u_fus_var3.c:18
int i
Definition bl1_axmyv2.c:145

References FLA_Bidiag_UT_u_step_ofu_var3(), and i.

◆ FLA_Bidiag_UT_u_step_ofc_var3()

FLA_Error FLA_Bidiag_UT_u_step_ofc_var3 ( int  m_A,
int  n_A,
int  m_TS,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_T,
int  rs_T,
int  cs_T,
scomplex buff_S,
int  rs_S,
int  cs_S 
)
927{
931
941 int i;
942
943 // b_alg = FLA_Obj_length( T );
944 int b_alg = m_TS;
945
946 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
947 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
948 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
949 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
950 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
951 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
952 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
953 scomplex* buff_w = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
954 scomplex* buff_ap = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
955 scomplex* buff_u = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
956 scomplex* buff_up = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
957 scomplex* buff_v = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
958 scomplex* buff_y = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
959 scomplex* buff_z = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
960 int inc_w = 1;
961 int inc_ap = 1;
962 int inc_u = 1;
963 int inc_up = 1;
964 int inc_v = 1;
965 int inc_y = 1;
966 int inc_z = 1;
967
968 for ( i = 0; i < b_alg; ++i )
969 {
970 scomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
971 scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
972 scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
973 scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
974 scomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
975 scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
976 scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
977
978 scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
979 scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
980
981 scomplex* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
982 scomplex* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
983
984 scomplex* w21 = buff_w + (i+1)*inc_w;
985
986 scomplex* a12p = buff_ap + (i+1)*inc_ap;
987
989 scomplex* u21 = buff_u + (i+1)*inc_u;
990
991 scomplex* u21p = buff_up + (i+1)*inc_up;
992
993 scomplex* nu11 = buff_v + (i )*inc_v;
994 scomplex* v21 = buff_v + (i+1)*inc_v;
995
996 scomplex* psi11 = buff_y + (i )*inc_y;
997 scomplex* y21 = buff_y + (i+1)*inc_y;
998
999 scomplex* zeta11 = buff_z + (i )*inc_z;
1000 scomplex* z21 = buff_z + (i+1)*inc_z;
1001
1002 scomplex* a12p_t = a12p + (0 )*inc_ap;
1003 scomplex* a12p_b = a12p + (1 )*inc_ap;
1004
1005 scomplex* v21_t = v21 + (0 )*inc_v;
1006 scomplex* v21_b = v21 + (1 )*inc_v;
1007
1008 scomplex* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
1009 scomplex* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
1010
1011 scomplex* A22_l = A22 + (0 )*cs_A + (0 )*rs_A;
1012
1013 int m_ahead = m_A - i - 1;
1014 int n_ahead = n_A - i - 1;
1015 int m_behind = i;
1016 int n_behind = i;
1017
1018 /*------------------------------------------------------------*/
1019
1020 if ( m_behind > 0 )
1021 {
1022 // FLA_Copy( upsilon11, minus_upsilon11 );
1023 // FLA_Scal( FLA_MINUS_ONE, minus_upsilon11 );
1025
1026 // FLA_Copy( zeta11, minus_zeta11 );
1027 // FLA_Scal( FLA_MINUS_ONE, minus_zeta11 );
1029
1030 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, psi11, minus_conj_psi11 );
1031 // FLA_Scal( FLA_MINUS_ONE, minus_conj_psi11 );
1034
1035 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, nu11, minus_conj_nu11 );
1036 // FLA_Scal( FLA_MINUS_ONE, minus_conj_nu11 );
1039
1040 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, upsilon11, alpha11 );
1041 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11, zeta11, alpha11 );
1043 1,
1045 upsilon11, 1,
1046 alpha11, 1 );
1048 1,
1050 zeta11, 1,
1051 alpha11, 1 );
1052
1053 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, u21, a21 );
1054 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11, z21, a21 );
1056 m_ahead,
1058 u21, inc_u,
1059 a21, rs_A );
1061 m_ahead,
1063 z21, inc_z,
1064 a21, rs_A );
1065
1066 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_upsilon11, y21, a12t );
1067 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_zeta11, v21, a12t );
1069 n_ahead,
1071 y21, inc_y,
1072 a12t, cs_A );
1074 n_ahead,
1075 &minus_zeta11,
1076 v21, inc_v,
1077 a12t, cs_A );
1078 }
1079
1080 // FLA_Househ2_UT( FLA_LEFT,
1081 // alpha11,
1082 // a21, tau11 );
1083 // FLA_Copy( a21, u21p );
1085 alpha11,
1086 a21, rs_A,
1087 tau11 );
1089 m_ahead,
1090 a21, rs_A,
1091 u21p, inc_up );
1092
1093 if ( n_ahead > 0 )
1094 {
1095 // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
1096 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
1098
1099 // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
1100 // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
1102 n_ahead,
1103 a12t, cs_A,
1104 a12p, inc_ap );
1106 n_ahead,
1108 a12t, cs_A,
1109 a12p, inc_ap );
1110 }
1111
1112 if ( m_behind > 0 && n_ahead > 0 )
1113 {
1114 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u21, y21, A22 );
1115 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
1116 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, u21p, FLA_ZERO, y21 );
1117 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
1118 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
1120 n_ahead,
1121 tau11,
1122 buff_m1,
1123 u21, inc_u,
1124 y21, inc_y,
1125 z21, inc_z,
1126 v21, inc_v,
1127 A22, rs_A, cs_A,
1128 u21p, inc_up,
1129 a12p, inc_ap,
1130 w21, inc_w );
1131
1132
1133 }
1134 else if ( n_ahead > 0 )
1135 {
1136 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, u21p, FLA_ZERO, y21 );
1137 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
1138 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
1140 n_ahead,
1141 tau11,
1142 buff_0,
1143 A22, rs_A, cs_A,
1144 u21p, inc_up,
1145 a12p, inc_ap,
1146 y21, inc_y,
1147 w21, inc_w );
1148 }
1149
1150 if ( n_ahead > 0 )
1151 {
1152 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
1154 n_ahead,
1155 buff_1,
1156 a12t, cs_A,
1157 y21, inc_y );
1158
1159 // FLA_Househ2s_UT( FLA_RIGHT,
1160 // a12p_t,
1161 // a12p_b,
1162 // alpha12, psi11_minus_alpha12, sigma11 );
1164 a12p_t,
1165 a12p_b, inc_ap,
1166 &alpha12,
1168 sigma11 );
1169
1170 // FLA_Copy( a12p, v21 );
1171 // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
1172 // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
1173 // FLA_Conjugate( v21_b );
1175 n_ahead,
1176 a12p, inc_ap,
1177 v21, inc_v );
1180 n_ahead,
1182 v21, inc_v );
1183 bl1_cconjv( n_ahead - 1,
1184 v21_b, inc_v );
1185
1186 // FLA_Copyt( FLA_NO_TRANSPOSE, alpha12, a12t_l );
1187 // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
1188 *a12t_l = alpha12;
1190 n_ahead - 1,
1191 v21_b, inc_v,
1192 a12t_r, cs_A );
1193 }
1194
1195 // FLA_Copy( u21p, u21 );
1197 m_ahead,
1198 u21p, inc_up,
1199 u21, inc_u );
1200
1201 if ( n_ahead > 0 )
1202 {
1203 // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
1204 // FLA_Scal( FLA_MINUS_ONE, beta );
1205 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
1207 n_ahead,
1208 y21, inc_y,
1209 v21, inc_v,
1210 &beta );
1212
1213 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
1214 // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
1217
1218 // FLA_Copy( w21, z21 );
1219 // FLA_Axpy( minus_conj_alpha12, A22_l, z21 );
1220 // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
1221 // FLA_Axpy( beta, u21, z21 );
1223 m_ahead,
1224 w21, inc_w,
1225 z21, inc_z );
1227 m_ahead,
1229 A22_l, rs_A,
1230 z21, inc_z );
1232 m_ahead,
1234 z21, inc_z );
1236 m_ahead,
1237 &beta,
1238 u21, inc_u,
1239 z21, inc_z );
1240
1241 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
1242 // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
1244 n_ahead,
1245 tau11,
1246 y21, inc_y );
1248 m_ahead,
1249 sigma11,
1250 z21, inc_z );
1251
1252 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
1255 m_behind,
1256 n_ahead,
1257 buff_1,
1258 A02, rs_A, cs_A,
1259 v21, inc_v,
1260 buff_0,
1261 s01, rs_S );
1262 }
1263
1264 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
1265 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
1267 n_behind,
1268 a10t, cs_A,
1269 t01, rs_T );
1272 m_ahead,
1273 n_behind,
1274 buff_1,
1275 A20, rs_A, cs_A,
1276 u21, inc_u,
1277 buff_1,
1278 t01, rs_T );
1279
1280 if ( m_behind + 1 == b_alg && n_ahead > 0 )
1281 {
1282 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u21, y21, A22 );
1283 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
1286 m_ahead,
1287 n_ahead,
1288 buff_m1,
1289 u21, inc_u,
1290 y21, inc_y,
1291 A22, rs_A, cs_A );
1294 m_ahead,
1295 n_ahead,
1296 buff_m1,
1297 z21, inc_z,
1298 v21, inc_v,
1299 A22, rs_A, cs_A );
1300 }
1301
1302 /*------------------------------------------------------------*/
1303
1304 }
1305
1306 // FLA_Obj_free( &w );
1307 // FLA_Obj_free( &ap );
1308 // FLA_Obj_free( &u );
1309 // FLA_Obj_free( &up );
1310 // FLA_Obj_free( &v );
1311 // FLA_Obj_free( &y );
1312 // FLA_Obj_free( &z );
1313 FLA_free( buff_w );
1314 FLA_free( buff_ap );
1315 FLA_free( buff_u );
1316 FLA_free( buff_up );
1317 FLA_free( buff_v );
1318 FLA_free( buff_y );
1319 FLA_free( buff_z );
1320
1321 return FLA_SUCCESS;
1322}
FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_opc_var1(int m_A, int n_A, scomplex *buff_tau, scomplex *buff_alpha, scomplex *buff_u, int inc_u, scomplex *buff_y, int inc_y, scomplex *buff_z, int inc_z, scomplex *buff_v, int inc_v, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_up, int inc_up, scomplex *buff_a, int inc_a, scomplex *buff_w, int inc_w)
Definition FLA_Fused_Gerc2_Ahx_Axpy_Ax_opt_var1.c:424
FLA_Error FLA_Fused_Ahx_Axpy_Ax_opc_var1(int m_A, int n_A, scomplex *buff_tau, scomplex *buff_beta, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_u, int inc_u, scomplex *buff_a, int inc_a, scomplex *buff_y, int inc_y, scomplex *buff_w, int inc_w)
Definition FLA_Fused_Ahx_Axpy_Ax_opt_var1.c:322
FLA_Obj FLA_MINUS_ONE
Definition FLA_Init.c:22
FLA_Obj FLA_ZERO
Definition FLA_Init.c:20
FLA_Obj FLA_ONE
Definition FLA_Init.c:18
void FLA_free(void *ptr)
Definition FLA_Memory.c:247
void * FLA_malloc(size_t size)
Definition FLA_Memory.c:111
FLA_Error FLA_Househ2s_UT_r_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *alpha, scomplex *chi_1_minus_alpha, scomplex *tau)
Definition FLA_Househ2s_UT.c:589
FLA_Error FLA_Househ2_UT_l_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *tau)
Definition FLA_Househ2_UT.c:390
void bl1_caxpyv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
Definition bl1_axpyv.c:29
void bl1_cconjv(int m, scomplex *x, int incx)
Definition bl1_conjv.c:23
void bl1_ccopyv(conj1_t conj, int m, scomplex *x, int incx, scomplex *y, int incy)
Definition bl1_copyv.c:49
void bl1_cdot(conj1_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho)
Definition bl1_dot.c:39
void bl1_cgemv(trans1_t transa, conj1_t conjx, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition bl1_gemv.c:125
void bl1_cger(conj1_t conjx, conj1_t conjy, int m, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy, scomplex *a, int a_rs, int a_cs)
Definition bl1_ger.c:111
void bl1_cinvscalv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx)
Definition bl1_invscalv.c:52
@ BLIS1_CONJ_TRANSPOSE
Definition blis_type_defs.h:57
@ BLIS1_CONJ_NO_TRANSPOSE
Definition blis_type_defs.h:56
@ BLIS1_CONJUGATE
Definition blis_type_defs.h:82
@ BLIS1_NO_CONJUGATE
Definition blis_type_defs.h:81
Definition blis_type_defs.h:133

References bl1_caxpyv(), bl1_cconjv(), bl1_ccopyv(), bl1_cdot(), bl1_cgemv(), bl1_cger(), bl1_cinvscalv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, FLA_free(), FLA_Fused_Ahx_Axpy_Ax_opc_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_opc_var1(), FLA_Househ2_UT_l_opc(), FLA_Househ2s_UT_r_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_ofu_var3().

◆ FLA_Bidiag_UT_u_step_ofd_var3()

FLA_Error FLA_Bidiag_UT_u_step_ofd_var3 ( int  m_A,
int  n_A,
int  m_TS,
double buff_A,
int  rs_A,
int  cs_A,
double buff_T,
int  rs_T,
int  cs_T,
double buff_S,
int  rs_S,
int  cs_S 
)
522{
523 double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
524 double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
526
527 double alpha12;
528 double minus_conj_alpha12;
529 double psi11_minus_alpha12;
530 double minus_inv_tau11;
531 double minus_upsilon11;
532 double minus_conj_nu11;
533 double minus_conj_psi11;
534 double minus_zeta11;
535 double beta;
536 int i;
537
538 // b_alg = FLA_Obj_length( T );
539 int b_alg = m_TS;
540
541 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
542 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
543 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
544 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
545 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
546 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
547 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
548 double* buff_w = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
549 double* buff_ap = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
550 double* buff_u = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
551 double* buff_up = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
552 double* buff_v = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
553 double* buff_y = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
554 double* buff_z = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
555 int inc_w = 1;
556 int inc_ap = 1;
557 int inc_u = 1;
558 int inc_up = 1;
559 int inc_v = 1;
560 int inc_y = 1;
561 int inc_z = 1;
562
563 for ( i = 0; i < b_alg; ++i )
564 {
565 double* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
566 double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
567 double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
568 double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
569 double* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
570 double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
571 double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
572
573 double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
574 double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
575
576 double* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
577 double* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
578
579 double* w21 = buff_w + (i+1)*inc_w;
580
581 double* a12p = buff_ap + (i+1)*inc_ap;
582
583 double* upsilon11 = buff_u + (i )*inc_u;
584 double* u21 = buff_u + (i+1)*inc_u;
585
586 double* u21p = buff_up + (i+1)*inc_up;
587
588 double* nu11 = buff_v + (i )*inc_v;
589 double* v21 = buff_v + (i+1)*inc_v;
590
591 double* psi11 = buff_y + (i )*inc_y;
592 double* y21 = buff_y + (i+1)*inc_y;
593
594 double* zeta11 = buff_z + (i )*inc_z;
595 double* z21 = buff_z + (i+1)*inc_z;
596
597 double* a12p_t = a12p + (0 )*inc_ap;
598 double* a12p_b = a12p + (1 )*inc_ap;
599
600 double* v21_t = v21 + (0 )*inc_v;
601 double* v21_b = v21 + (1 )*inc_v;
602
603 double* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
604 double* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
605
606 double* A22_l = A22 + (0 )*cs_A + (0 )*rs_A;
607
608 int m_ahead = m_A - i - 1;
609 int n_ahead = n_A - i - 1;
610 int m_behind = i;
611 int n_behind = i;
612
613 /*------------------------------------------------------------*/
614
615 if ( m_behind > 0 )
616 {
617 // FLA_Copy( upsilon11, minus_upsilon11 );
618 // FLA_Scal( FLA_MINUS_ONE, minus_upsilon11 );
620
621 // FLA_Copy( zeta11, minus_zeta11 );
622 // FLA_Scal( FLA_MINUS_ONE, minus_zeta11 );
624
625 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, psi11, minus_conj_psi11 );
626 // FLA_Scal( FLA_MINUS_ONE, minus_conj_psi11 );
629
630 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, nu11, minus_conj_nu11 );
631 // FLA_Scal( FLA_MINUS_ONE, minus_conj_nu11 );
634
635 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, upsilon11, alpha11 );
636 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11, zeta11, alpha11 );
638 1,
640 upsilon11, 1,
641 alpha11, 1 );
643 1,
645 zeta11, 1,
646 alpha11, 1 );
647
648 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, u21, a21 );
649 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11, z21, a21 );
651 m_ahead,
653 u21, inc_u,
654 a21, rs_A );
656 m_ahead,
658 z21, inc_z,
659 a21, rs_A );
660
661 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_upsilon11, y21, a12t );
662 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_zeta11, v21, a12t );
664 n_ahead,
666 y21, inc_y,
667 a12t, cs_A );
669 n_ahead,
671 v21, inc_v,
672 a12t, cs_A );
673 }
674
675 // FLA_Househ2_UT( FLA_LEFT,
676 // alpha11,
677 // a21, tau11 );
678 // FLA_Copy( a21, u21p );
680 alpha11,
681 a21, rs_A,
682 tau11 );
684 m_ahead,
685 a21, rs_A,
686 u21p, inc_up );
687
688 if ( n_ahead > 0 )
689 {
690 // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
691 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
693
694 // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
695 // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
697 n_ahead,
698 a12t, cs_A,
699 a12p, inc_ap );
701 n_ahead,
703 a12t, cs_A,
704 a12p, inc_ap );
705 }
706
707 if ( m_behind > 0 && n_ahead > 0 )
708 {
709 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u21, y21, A22 );
710 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
711 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, u21p, FLA_ZERO, y21 );
712 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
713 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
715 n_ahead,
716 tau11,
717 buff_m1,
718 u21, inc_u,
719 y21, inc_y,
720 z21, inc_z,
721 v21, inc_v,
722 A22, rs_A, cs_A,
723 u21p, inc_up,
724 a12p, inc_ap,
725 w21, inc_w );
726
727
728 }
729 else if ( n_ahead > 0 )
730 {
731 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, u21p, FLA_ZERO, y21 );
732 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
733 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
735 n_ahead,
736 tau11,
737 buff_0,
738 A22, rs_A, cs_A,
739 u21p, inc_up,
740 a12p, inc_ap,
741 y21, inc_y,
742 w21, inc_w );
743 }
744
745 if ( n_ahead > 0 )
746 {
747 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
749 n_ahead,
750 buff_1,
751 a12t, cs_A,
752 y21, inc_y );
753
754 // FLA_Househ2s_UT( FLA_RIGHT,
755 // a12p_t,
756 // a12p_b,
757 // alpha12, psi11_minus_alpha12, sigma11 );
759 a12p_t,
760 a12p_b, inc_ap,
761 &alpha12,
763 sigma11 );
764
765 // FLA_Copy( a12p, v21 );
766 // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
767 // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
768 // FLA_Conjugate( v21_b );
770 n_ahead,
771 a12p, inc_ap,
772 v21, inc_v );
775 n_ahead,
777 v21, inc_v );
778 bl1_dconjv( n_ahead - 1,
779 v21_b, inc_v );
780
781 // FLA_Copyt( FLA_NO_TRANSPOSE, alpha12, a12t_l );
782 // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
783 *a12t_l = alpha12;
785 n_ahead - 1,
786 v21_b, inc_v,
787 a12t_r, cs_A );
788 }
789
790 // FLA_Copy( u21p, u21 );
792 m_ahead,
793 u21p, inc_up,
794 u21, inc_u );
795
796 if ( n_ahead > 0 )
797 {
798 // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
799 // FLA_Scal( FLA_MINUS_ONE, beta );
800 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
802 n_ahead,
803 y21, inc_y,
804 v21, inc_v,
805 &beta );
807
808 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
809 // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
812
813 // FLA_Copy( w21, z21 );
814 // FLA_Axpy( minus_conj_alpha12, A22_l, z21 );
815 // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
816 // FLA_Axpy( beta, u21, z21 );
818 m_ahead,
819 w21, inc_w,
820 z21, inc_z );
822 m_ahead,
824 A22_l, rs_A,
825 z21, inc_z );
827 m_ahead,
829 z21, inc_z );
831 m_ahead,
832 &beta,
833 u21, inc_u,
834 z21, inc_z );
835
836 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
837 // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
839 n_ahead,
840 tau11,
841 y21, inc_y );
843 m_ahead,
844 sigma11,
845 z21, inc_z );
846
847 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
850 m_behind,
851 n_ahead,
852 buff_1,
853 A02, rs_A, cs_A,
854 v21, inc_v,
855 buff_0,
856 s01, rs_S );
857 }
858
859 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
860 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
862 n_behind,
863 a10t, cs_A,
864 t01, rs_T );
867 m_ahead,
868 n_behind,
869 buff_1,
870 A20, rs_A, cs_A,
871 u21, inc_u,
872 buff_1,
873 t01, rs_T );
874
875 if ( m_behind + 1 == b_alg && n_ahead > 0 )
876 {
877 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u21, y21, A22 );
878 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
881 m_ahead,
882 n_ahead,
883 buff_m1,
884 u21, inc_u,
885 y21, inc_y,
886 A22, rs_A, cs_A );
889 m_ahead,
890 n_ahead,
891 buff_m1,
892 z21, inc_z,
893 v21, inc_v,
894 A22, rs_A, cs_A );
895 }
896
897 /*------------------------------------------------------------*/
898
899 }
900
901 // FLA_Obj_free( &w );
902 // FLA_Obj_free( &ap );
903 // FLA_Obj_free( &u );
904 // FLA_Obj_free( &up );
905 // FLA_Obj_free( &v );
906 // FLA_Obj_free( &y );
907 // FLA_Obj_free( &z );
908 FLA_free( buff_w );
909 FLA_free( buff_ap );
910 FLA_free( buff_u );
911 FLA_free( buff_up );
912 FLA_free( buff_v );
913 FLA_free( buff_y );
914 FLA_free( buff_z );
915
916 return FLA_SUCCESS;
917}
FLA_Error FLA_Fused_Ahx_Axpy_Ax_opd_var1(int m_A, int n_A, double *buff_tau, double *buff_beta, double *buff_A, int rs_A, int cs_A, double *buff_u, int inc_u, double *buff_a, int inc_a, double *buff_y, int inc_y, double *buff_w, int inc_w)
Definition FLA_Fused_Ahx_Axpy_Ax_opt_var1.c:207
FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_opd_var1(int m_A, int n_A, double *buff_tau, double *buff_alpha, double *buff_u, int inc_u, double *buff_y, int inc_y, double *buff_z, int inc_z, double *buff_v, int inc_v, double *buff_A, int rs_A, int cs_A, double *buff_up, int inc_up, double *buff_a, int inc_a, double *buff_w, int inc_w)
Definition FLA_Fused_Gerc2_Ahx_Axpy_Ax_opt_var1.c:267
FLA_Error FLA_Househ2_UT_l_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *tau)
Definition FLA_Househ2_UT.c:274
FLA_Error FLA_Househ2s_UT_r_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *alpha, double *chi_1_minus_alpha, double *tau)
Definition FLA_Househ2s_UT.c:572
void bl1_daxpyv(conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy)
Definition bl1_axpyv.c:21
void bl1_dconjv(int m, double *x, int incx)
Definition bl1_conjv.c:18
void bl1_dcopyv(conj1_t conj, int m, double *x, int incx, double *y, int incy)
Definition bl1_copyv.c:42
void bl1_ddot(conj1_t conj, int n, double *x, int incx, double *y, int incy, double *rho)
Definition bl1_dot.c:26
void bl1_dgemv(trans1_t transa, conj1_t conjx, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
Definition bl1_gemv.c:69
void bl1_dger(conj1_t conjx, conj1_t conjy, int m, int n, double *alpha, double *x, int incx, double *y, int incy, double *a, int a_rs, int a_cs)
Definition bl1_ger.c:62
void bl1_dinvscalv(conj1_t conj, int n, double *alpha, double *x, int incx)
Definition bl1_invscalv.c:26

References bl1_daxpyv(), bl1_dconjv(), bl1_dcopyv(), bl1_ddot(), bl1_dgemv(), bl1_dger(), bl1_dinvscalv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, FLA_free(), FLA_Fused_Ahx_Axpy_Ax_opd_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_opd_var1(), FLA_Househ2_UT_l_opd(), FLA_Househ2s_UT_r_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_ofu_var3().

◆ FLA_Bidiag_UT_u_step_ofs_var3()

FLA_Error FLA_Bidiag_UT_u_step_ofs_var3 ( int  m_A,
int  n_A,
int  m_TS,
float buff_A,
int  rs_A,
int  cs_A,
float buff_T,
int  rs_T,
int  cs_T,
float buff_S,
int  rs_S,
int  cs_S 
)
120{
121 float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
122 float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
124
125 float alpha12;
126 float minus_conj_alpha12;
128 float minus_inv_tau11;
129 float minus_upsilon11;
130 float minus_conj_nu11;
131 float minus_conj_psi11;
132 float minus_zeta11;
133 float beta;
134 int i;
135
136 // b_alg = FLA_Obj_length( T );
137 int b_alg = m_TS;
138
139 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
140 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
141 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
142 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
143 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
144 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
145 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
146 float* buff_w = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
147 float* buff_ap = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
148 float* buff_u = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
149 float* buff_up = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
150 float* buff_v = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
151 float* buff_y = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
152 float* buff_z = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
153 int inc_w = 1;
154 int inc_ap = 1;
155 int inc_u = 1;
156 int inc_up = 1;
157 int inc_v = 1;
158 int inc_y = 1;
159 int inc_z = 1;
160
161 for ( i = 0; i < b_alg; ++i )
162 {
163 float* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
164 float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
165 float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
166 float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
167 float* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
168 float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
169 float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
170
171 float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
172 float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
173
174 float* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
175 float* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
176
177 float* w21 = buff_w + (i+1)*inc_w;
178
179 float* a12p = buff_ap + (i+1)*inc_ap;
180
181 float* upsilon11 = buff_u + (i )*inc_u;
182 float* u21 = buff_u + (i+1)*inc_u;
183
184 float* u21p = buff_up + (i+1)*inc_up;
185
186 float* nu11 = buff_v + (i )*inc_v;
187 float* v21 = buff_v + (i+1)*inc_v;
188
189 float* psi11 = buff_y + (i )*inc_y;
190 float* y21 = buff_y + (i+1)*inc_y;
191
192 float* zeta11 = buff_z + (i )*inc_z;
193 float* z21 = buff_z + (i+1)*inc_z;
194
195 float* a12p_t = a12p + (0 )*inc_ap;
196 float* a12p_b = a12p + (1 )*inc_ap;
197
198 float* v21_t = v21 + (0 )*inc_v;
199 float* v21_b = v21 + (1 )*inc_v;
200
201 float* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
202 float* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
203
204 float* A22_l = A22 + (0 )*cs_A + (0 )*rs_A;
205
206 int m_ahead = m_A - i - 1;
207 int n_ahead = n_A - i - 1;
208 int m_behind = i;
209 int n_behind = i;
210
211 /*------------------------------------------------------------*/
212
213 if ( m_behind > 0 )
214 {
215 // FLA_Copy( upsilon11, minus_upsilon11 );
216 // FLA_Scal( FLA_MINUS_ONE, minus_upsilon11 );
218
219 // FLA_Copy( zeta11, minus_zeta11 );
220 // FLA_Scal( FLA_MINUS_ONE, minus_zeta11 );
222
223 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, psi11, minus_conj_psi11 );
224 // FLA_Scal( FLA_MINUS_ONE, minus_conj_psi11 );
227
228 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, nu11, minus_conj_nu11 );
229 // FLA_Scal( FLA_MINUS_ONE, minus_conj_nu11 );
232
233 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_upsilon11, psi11, alpha11 );
234 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_zeta11, nu11, alpha11 );
236 1,
238 psi11, 1,
239 alpha11, 1 );
241 1,
243 nu11, 1,
244 alpha11, 1 );
245
246 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, u21, a21 );
247 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11, z21, a21 );
249 m_ahead,
251 u21, inc_u,
252 a21, rs_A );
254 m_ahead,
256 z21, inc_z,
257 a21, rs_A );
258
259 // FLA_Axpyt( FLA_TRANSPOSE, minus_upsilon11, y21, a12t );
260 // FLA_Axpyt( FLA_TRANSPOSE, minus_zeta11, v21, a12t );
262 n_ahead,
264 y21, inc_y,
265 a12t, cs_A );
267 n_ahead,
269 v21, inc_v,
270 a12t, cs_A );
271 }
272
273 // FLA_Househ2_UT( FLA_LEFT,
274 // alpha11,
275 // a21, tau11 );
276 // FLA_Copy( a21, u21p );
278 alpha11,
279 a21, rs_A,
280 tau11 );
282 m_ahead,
283 a21, rs_A,
284 u21p, inc_up );
285
286 if ( n_ahead > 0 )
287 {
288 // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
289 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
291
292 // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
293 // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
295 n_ahead,
296 a12t, cs_A,
297 a12p, inc_ap );
299 n_ahead,
301 a12t, cs_A,
302 a12p, inc_ap );
303 }
304
305 if ( m_behind > 0 && n_ahead > 0 )
306 {
307 // FLA_Ger( FLA_MINUS_ONE, u21, y21, A22 );
308 // FLA_Ger( FLA_MINUS_ONE, z21, v21, A22 );
309 // FLA_Gemvc( FLA_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, u21p, FLA_ZERO, y21 );
310 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
311 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
313 n_ahead,
314 tau11,
315 buff_m1,
316 u21, inc_u,
317 y21, inc_y,
318 z21, inc_z,
319 v21, inc_v,
320 A22, rs_A, cs_A,
321 u21p, inc_up,
322 a12p, inc_ap,
323 w21, inc_w );
324
325
326 }
327 else if ( n_ahead > 0 )
328 {
329 // FLA_Gemvc( FLA_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, u21p, FLA_ZERO, y21 );
330 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
331 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
333 n_ahead,
334 tau11,
335 buff_0,
336 A22, rs_A, cs_A,
337 u21p, inc_up,
338 a12p, inc_ap,
339 y21, inc_y,
340 w21, inc_w );
341 }
342
343 if ( n_ahead > 0 )
344 {
345 // FLA_Axpyt( FLA_TRANSPOSE, FLA_ONE, a12t, y21 );
347 n_ahead,
348 buff_1,
349 a12t, cs_A,
350 y21, inc_y );
351
352 // FLA_Househ2s_UT( FLA_RIGHT,
353 // a12p_t,
354 // a12p_b,
355 // alpha12, psi11_minus_alpha12, sigma11 );
357 a12p_t,
358 a12p_b, inc_ap,
359 &alpha12,
361 sigma11 );
362
363 // FLA_Copy( a12p, v21 );
364 // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
365 // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
367 n_ahead,
368 a12p, inc_ap,
369 v21, inc_v );
372 n_ahead,
374 v21, inc_v );
375
376 // FLA_Copy( alpha12, a12t_l );
377 // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
378 *a12t_l = alpha12;
380 n_ahead - 1,
381 v21_b, inc_v,
382 a12t_r, cs_A );
383 }
384
385 // FLA_Copy( u21p, u21 );
387 m_ahead,
388 u21p, inc_up,
389 u21, inc_u );
390
391 if ( n_ahead > 0 )
392 {
393 // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
394 // FLA_Scal( FLA_MINUS_ONE, beta );
395 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
397 n_ahead,
398 y21, inc_y,
399 v21, inc_v,
400 &beta );
402
403 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
404 // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
407
408 // FLA_Copy( w21, z21 );
409 // FLA_Axpy( minus_conj_alpha12, A22_l, z21 );
410 // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
411 // FLA_Axpy( beta, u21, z21 );
413 m_ahead,
414 w21, inc_w,
415 z21, inc_z );
417 m_ahead,
419 A22_l, rs_A,
420 z21, inc_z );
422 m_ahead,
424 z21, inc_z );
426 m_ahead,
427 &beta,
428 u21, inc_u,
429 z21, inc_z );
430
431 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
432 // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
434 n_ahead,
435 tau11,
436 y21, inc_y );
438 m_ahead,
439 sigma11,
440 z21, inc_z );
441
442 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
445 m_behind,
446 n_ahead,
447 buff_1,
448 A02, rs_A, cs_A,
449 v21, inc_v,
450 buff_0,
451 s01, rs_S );
452 }
453
454 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
455 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
457 n_behind,
458 a10t, cs_A,
459 t01, rs_T );
462 m_ahead,
463 n_behind,
464 buff_1,
465 A20, rs_A, cs_A,
466 u21, inc_u,
467 buff_1,
468 t01, rs_T );
469
470 if ( m_behind + 1 == b_alg && n_ahead > 0 )
471 {
472 // FLA_Ger( FLA_MINUS_ONE, u21, y21, A22 );
473 // FLA_Ger( FLA_MINUS_ONE, z21, v21, A22 );
476 m_ahead,
477 n_ahead,
478 buff_m1,
479 u21, inc_u,
480 y21, inc_y,
481 A22, rs_A, cs_A );
484 m_ahead,
485 n_ahead,
486 buff_m1,
487 z21, inc_z,
488 v21, inc_v,
489 A22, rs_A, cs_A );
490 }
491
492 /*------------------------------------------------------------*/
493
494 }
495
496 // FLA_Obj_free( &w );
497 // FLA_Obj_free( &ap );
498 // FLA_Obj_free( &u );
499 // FLA_Obj_free( &up );
500 // FLA_Obj_free( &v );
501 // FLA_Obj_free( &y );
502 // FLA_Obj_free( &z );
503 FLA_free( buff_w );
504 FLA_free( buff_ap );
505 FLA_free( buff_u );
506 FLA_free( buff_up );
507 FLA_free( buff_v );
508 FLA_free( buff_y );
509 FLA_free( buff_z );
510
511 return FLA_SUCCESS;
512}
FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_ops_var1(int m_A, int n_A, float *buff_tau, float *buff_alpha, float *buff_u, int inc_u, float *buff_y, int inc_y, float *buff_z, int inc_z, float *buff_v, int inc_v, float *buff_A, int rs_A, int cs_A, float *buff_up, int inc_up, float *buff_a, int inc_a, float *buff_w, int inc_w)
Definition FLA_Fused_Gerc2_Ahx_Axpy_Ax_opt_var1.c:170
FLA_Error FLA_Fused_Ahx_Axpy_Ax_ops_var1(int m_A, int n_A, float *buff_tau, float *buff_beta, float *buff_A, int rs_A, int cs_A, float *buff_u, int inc_u, float *buff_a, int inc_a, float *buff_y, int inc_y, float *buff_w, int inc_w)
Definition FLA_Fused_Ahx_Axpy_Ax_opt_var1.c:143
FLA_Error FLA_Househ2_UT_l_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *tau)
Definition FLA_Househ2_UT.c:160
FLA_Error FLA_Househ2s_UT_r_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *alpha, float *chi_1_minus_alpha, float *tau)
Definition FLA_Househ2s_UT.c:555
void bl1_saxpyv(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy)
Definition bl1_axpyv.c:13
void bl1_scopyv(conj1_t conj, int m, float *x, int incx, float *y, int incy)
Definition bl1_copyv.c:35
void bl1_sdot(conj1_t conj, int n, float *x, int incx, float *y, int incy, float *rho)
Definition bl1_dot.c:13
void bl1_sgemv(trans1_t transa, conj1_t conjx, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
Definition bl1_gemv.c:13
void bl1_sger(conj1_t conjx, conj1_t conjy, int m, int n, float *alpha, float *x, int incx, float *y, int incy, float *a, int a_rs, int a_cs)
Definition bl1_ger.c:13
void bl1_sinvscalv(conj1_t conj, int n, float *alpha, float *x, int incx)
Definition bl1_invscalv.c:13
@ BLIS1_NO_TRANSPOSE
Definition blis_type_defs.h:54

References bl1_saxpyv(), bl1_scopyv(), bl1_sdot(), bl1_sgemv(), bl1_sger(), bl1_sinvscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Fused_Ahx_Axpy_Ax_ops_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_ops_var1(), FLA_Househ2_UT_l_ops(), FLA_Househ2s_UT_r_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_ofu_var3().

◆ FLA_Bidiag_UT_u_step_ofu_var3()

FLA_Error FLA_Bidiag_UT_u_step_ofu_var3 ( FLA_Obj  A,
FLA_Obj  T,
FLA_Obj  S 
)
19{
20 FLA_Datatype datatype;
21 int m_A, n_A, m_TS;
22 int rs_A, cs_A;
23 int rs_T, cs_T;
24 int rs_S, cs_S;
25
26 datatype = FLA_Obj_datatype( A );
27
28 m_A = FLA_Obj_length( A );
29 n_A = FLA_Obj_width( A );
31
34
37
40
41
42 switch ( datatype )
43 {
44 case FLA_FLOAT:
45 {
46 float* buff_A = FLA_FLOAT_PTR( A );
47 float* buff_T = FLA_FLOAT_PTR( T );
48 float* buff_S = FLA_FLOAT_PTR( S );
49
51 n_A,
52 m_TS,
55 buff_S, rs_S, cs_S );
56
57 break;
58 }
59
60 case FLA_DOUBLE:
61 {
62 double* buff_A = FLA_DOUBLE_PTR( A );
63 double* buff_T = FLA_DOUBLE_PTR( T );
64 double* buff_S = FLA_DOUBLE_PTR( S );
65
67 n_A,
68 m_TS,
71 buff_S, rs_S, cs_S );
72
73 break;
74 }
75
76 case FLA_COMPLEX:
77 {
81
83 n_A,
84 m_TS,
87 buff_S, rs_S, cs_S );
88
89 break;
90 }
91
93 {
97
99 n_A,
100 m_TS,
101 buff_A, rs_A, cs_A,
102 buff_T, rs_T, cs_T,
103 buff_S, rs_S, cs_S );
104
105 break;
106 }
107 }
108
109 return FLA_SUCCESS;
110}
FLA_Error FLA_Bidiag_UT_u_step_ofd_var3(int m_A, int n_A, int m_TS, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T, double *buff_S, int rs_S, int cs_S)
Definition FLA_Bidiag_UT_u_fus_var3.c:516
FLA_Error FLA_Bidiag_UT_u_step_ofz_var3(int m_A, int n_A, int m_TS, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T, dcomplex *buff_S, int rs_S, int cs_S)
Definition FLA_Bidiag_UT_u_fus_var3.c:1326
FLA_Error FLA_Bidiag_UT_u_step_ofs_var3(int m_A, int n_A, int m_TS, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T, float *buff_S, int rs_S, int cs_S)
Definition FLA_Bidiag_UT_u_fus_var3.c:114
FLA_Error FLA_Bidiag_UT_u_step_ofc_var3(int m_A, int n_A, int m_TS, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T, scomplex *buff_S, int rs_S, int cs_S)
Definition FLA_Bidiag_UT_u_fus_var3.c:921
dim_t FLA_Obj_width(FLA_Obj obj)
Definition FLA_Query.c:123
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition FLA_Query.c:167
dim_t FLA_Obj_length(FLA_Obj obj)
Definition FLA_Query.c:116
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition FLA_Query.c:174
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition FLA_Query.c:13
int FLA_Datatype
Definition FLA_type_defs.h:49
Definition blis_type_defs.h:138

References FLA_Bidiag_UT_u_step_ofc_var3(), FLA_Bidiag_UT_u_step_ofd_var3(), FLA_Bidiag_UT_u_step_ofs_var3(), FLA_Bidiag_UT_u_step_ofz_var3(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), and i.

Referenced by FLA_Bidiag_UT_u_blf_var3(), and FLA_Bidiag_UT_u_ofu_var3().

◆ FLA_Bidiag_UT_u_step_ofz_var3()

FLA_Error FLA_Bidiag_UT_u_step_ofz_var3 ( int  m_A,
int  n_A,
int  m_TS,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_T,
int  rs_T,
int  cs_T,
dcomplex buff_S,
int  rs_S,
int  cs_S 
)
1332{
1336
1345 dcomplex beta;
1346 int i;
1347
1348 // b_alg = FLA_Obj_length( T );
1349 int b_alg = m_TS;
1350
1351 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
1352 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
1353 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
1354 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
1355 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
1356 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
1357 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
1358 dcomplex* buff_w = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1359 dcomplex* buff_ap = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1360 dcomplex* buff_u = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1361 dcomplex* buff_up = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1362 dcomplex* buff_v = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1363 dcomplex* buff_y = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1364 dcomplex* buff_z = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1365 int inc_w = 1;
1366 int inc_ap = 1;
1367 int inc_u = 1;
1368 int inc_up = 1;
1369 int inc_v = 1;
1370 int inc_y = 1;
1371 int inc_z = 1;
1372
1373 for ( i = 0; i < b_alg; ++i )
1374 {
1375 dcomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
1376 dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
1377 dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
1378 dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
1379 dcomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
1380 dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
1381 dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
1382
1383 dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
1384 dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
1385
1386 dcomplex* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
1387 dcomplex* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
1388
1389 dcomplex* w21 = buff_w + (i+1)*inc_w;
1390
1391 dcomplex* a12p = buff_ap + (i+1)*inc_ap;
1392
1394 dcomplex* u21 = buff_u + (i+1)*inc_u;
1395
1396 dcomplex* u21p = buff_up + (i+1)*inc_up;
1397
1398 dcomplex* nu11 = buff_v + (i )*inc_v;
1399 dcomplex* v21 = buff_v + (i+1)*inc_v;
1400
1401 dcomplex* psi11 = buff_y + (i )*inc_y;
1402 dcomplex* y21 = buff_y + (i+1)*inc_y;
1403
1404 dcomplex* zeta11 = buff_z + (i )*inc_z;
1405 dcomplex* z21 = buff_z + (i+1)*inc_z;
1406
1407 dcomplex* a12p_t = a12p + (0 )*inc_ap;
1408 dcomplex* a12p_b = a12p + (1 )*inc_ap;
1409
1410 dcomplex* v21_t = v21 + (0 )*inc_v;
1411 dcomplex* v21_b = v21 + (1 )*inc_v;
1412
1413 dcomplex* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
1414 dcomplex* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
1415
1416 dcomplex* A22_l = A22 + (0 )*cs_A + (0 )*rs_A;
1417
1418 int m_ahead = m_A - i - 1;
1419 int n_ahead = n_A - i - 1;
1420 int m_behind = i;
1421 int n_behind = i;
1422
1423 /*------------------------------------------------------------*/
1424
1425 if ( m_behind > 0 )
1426 {
1427 // FLA_Copy( upsilon11, minus_upsilon11 );
1428 // FLA_Scal( FLA_MINUS_ONE, minus_upsilon11 );
1430
1431 // FLA_Copy( zeta11, minus_zeta11 );
1432 // FLA_Scal( FLA_MINUS_ONE, minus_zeta11 );
1434
1435 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, psi11, minus_conj_psi11 );
1436 // FLA_Scal( FLA_MINUS_ONE, minus_conj_psi11 );
1439
1440 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, nu11, minus_conj_nu11 );
1441 // FLA_Scal( FLA_MINUS_ONE, minus_conj_nu11 );
1444
1445 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, upsilon11, alpha11 );
1446 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11, zeta11, alpha11 );
1448 1,
1450 upsilon11, 1,
1451 alpha11, 1 );
1453 1,
1455 zeta11, 1,
1456 alpha11, 1 );
1457
1458 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, u21, a21 );
1459 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11, z21, a21 );
1461 m_ahead,
1463 u21, inc_u,
1464 a21, rs_A );
1466 m_ahead,
1468 z21, inc_z,
1469 a21, rs_A );
1470
1471 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_upsilon11, y21, a12t );
1472 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_zeta11, v21, a12t );
1474 n_ahead,
1476 y21, inc_y,
1477 a12t, cs_A );
1479 n_ahead,
1480 &minus_zeta11,
1481 v21, inc_v,
1482 a12t, cs_A );
1483 }
1484
1485 // FLA_Househ2_UT( FLA_LEFT,
1486 // alpha11,
1487 // a21, tau11 );
1488 // FLA_Copy( a21, u21p );
1490 alpha11,
1491 a21, rs_A,
1492 tau11 );
1494 m_ahead,
1495 a21, rs_A,
1496 u21p, inc_up );
1497
1498 if ( n_ahead > 0 )
1499 {
1500 // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
1501 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
1503
1504 // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
1505 // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
1507 n_ahead,
1508 a12t, cs_A,
1509 a12p, inc_ap );
1511 n_ahead,
1513 a12t, cs_A,
1514 a12p, inc_ap );
1515 }
1516
1517 if ( m_behind > 0 && n_ahead > 0 )
1518 {
1519 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u21, y21, A22 );
1520 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
1521 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, u21p, FLA_ZERO, y21 );
1522 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
1523 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
1525 n_ahead,
1526 tau11,
1527 buff_m1,
1528 u21, inc_u,
1529 y21, inc_y,
1530 z21, inc_z,
1531 v21, inc_v,
1532 A22, rs_A, cs_A,
1533 u21p, inc_up,
1534 a12p, inc_ap,
1535 w21, inc_w );
1536
1537
1538 }
1539 else if ( n_ahead > 0 )
1540 {
1541 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, u21p, FLA_ZERO, y21 );
1542 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
1543 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
1545 n_ahead,
1546 tau11,
1547 buff_0,
1548 A22, rs_A, cs_A,
1549 u21p, inc_up,
1550 a12p, inc_ap,
1551 y21, inc_y,
1552 w21, inc_w );
1553 }
1554
1555 if ( n_ahead > 0 )
1556 {
1557 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
1559 n_ahead,
1560 buff_1,
1561 a12t, cs_A,
1562 y21, inc_y );
1563
1564 // FLA_Househ2s_UT( FLA_RIGHT,
1565 // a12p_t,
1566 // a12p_b,
1567 // alpha12, psi11_minus_alpha12, sigma11 );
1569 a12p_t,
1570 a12p_b, inc_ap,
1571 &alpha12,
1573 sigma11 );
1574
1575 // FLA_Copy( a12p, v21 );
1576 // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
1577 // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
1578 // FLA_Conjugate( v21_b );
1580 n_ahead,
1581 a12p, inc_ap,
1582 v21, inc_v );
1585 n_ahead,
1587 v21, inc_v );
1588 bl1_zconjv( n_ahead - 1,
1589 v21_b, inc_v );
1590
1591 // FLA_Copyt( FLA_NO_TRANSPOSE, alpha12, a12t_l );
1592 // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
1593 *a12t_l = alpha12;
1595 n_ahead - 1,
1596 v21_b, inc_v,
1597 a12t_r, cs_A );
1598 }
1599
1600 // FLA_Copy( u21p, u21 );
1602 m_ahead,
1603 u21p, inc_up,
1604 u21, inc_u );
1605
1606 if ( n_ahead > 0 )
1607 {
1608 // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
1609 // FLA_Scal( FLA_MINUS_ONE, beta );
1610 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
1612 n_ahead,
1613 y21, inc_y,
1614 v21, inc_v,
1615 &beta );
1617
1618 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
1619 // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
1622
1623 // FLA_Copy( w21, z21 );
1624 // FLA_Axpy( minus_conj_alpha12, A22_l, z21 );
1625 // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
1626 // FLA_Axpy( beta, u21, z21 );
1628 m_ahead,
1629 w21, inc_w,
1630 z21, inc_z );
1632 m_ahead,
1634 A22_l, rs_A,
1635 z21, inc_z );
1637 m_ahead,
1639 z21, inc_z );
1641 m_ahead,
1642 &beta,
1643 u21, inc_u,
1644 z21, inc_z );
1645
1646 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
1647 // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
1649 n_ahead,
1650 tau11,
1651 y21, inc_y );
1653 m_ahead,
1654 sigma11,
1655 z21, inc_z );
1656
1657 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
1660 m_behind,
1661 n_ahead,
1662 buff_1,
1663 A02, rs_A, cs_A,
1664 v21, inc_v,
1665 buff_0,
1666 s01, rs_S );
1667 }
1668
1669 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
1670 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
1672 n_behind,
1673 a10t, cs_A,
1674 t01, rs_T );
1677 m_ahead,
1678 n_behind,
1679 buff_1,
1680 A20, rs_A, cs_A,
1681 u21, inc_u,
1682 buff_1,
1683 t01, rs_T );
1684
1685 if ( m_behind + 1 == b_alg && n_ahead > 0 )
1686 {
1687 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u21, y21, A22 );
1688 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
1691 m_ahead,
1692 n_ahead,
1693 buff_m1,
1694 u21, inc_u,
1695 y21, inc_y,
1696 A22, rs_A, cs_A );
1699 m_ahead,
1700 n_ahead,
1701 buff_m1,
1702 z21, inc_z,
1703 v21, inc_v,
1704 A22, rs_A, cs_A );
1705 }
1706
1707 /*------------------------------------------------------------*/
1708
1709 }
1710
1711 // FLA_Obj_free( &w );
1712 // FLA_Obj_free( &ap );
1713 // FLA_Obj_free( &u );
1714 // FLA_Obj_free( &up );
1715 // FLA_Obj_free( &v );
1716 // FLA_Obj_free( &y );
1717 // FLA_Obj_free( &z );
1718 FLA_free( buff_w );
1719 FLA_free( buff_ap );
1720 FLA_free( buff_u );
1721 FLA_free( buff_up );
1722 FLA_free( buff_v );
1723 FLA_free( buff_y );
1724 FLA_free( buff_z );
1725
1726 return FLA_SUCCESS;
1727}
FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_opz_var1(int m_A, int n_A, dcomplex *buff_tau, dcomplex *buff_alpha, dcomplex *buff_u, int inc_u, dcomplex *buff_y, int inc_y, dcomplex *buff_z, int inc_z, dcomplex *buff_v, int inc_v, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_up, int inc_up, dcomplex *buff_a, int inc_a, dcomplex *buff_w, int inc_w)
Definition FLA_Fused_Gerc2_Ahx_Axpy_Ax_opt_var1.c:523
FLA_Error FLA_Fused_Ahx_Axpy_Ax_opz_var1(int m_A, int n_A, dcomplex *buff_tau, dcomplex *buff_beta, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_u, int inc_u, dcomplex *buff_a, int inc_a, dcomplex *buff_y, int inc_y, dcomplex *buff_w, int inc_w)
Definition FLA_Fused_Ahx_Axpy_Ax_opt_var1.c:390
FLA_Error FLA_Househ2_UT_l_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *tau)
Definition FLA_Househ2_UT.c:521
FLA_Error FLA_Househ2s_UT_r_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *alpha, dcomplex *chi_1_minus_alpha, dcomplex *tau)
Definition FLA_Househ2s_UT.c:610
void bl1_zaxpyv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy)
Definition bl1_axpyv.c:60
void bl1_zconjv(int m, dcomplex *x, int incx)
Definition bl1_conjv.c:34
void bl1_zcopyv(conj1_t conj, int m, dcomplex *x, int incx, dcomplex *y, int incy)
Definition bl1_copyv.c:63
void bl1_zdot(conj1_t conj, int n, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *rho)
Definition bl1_dot.c:65
bl1_zscals(beta, rho_yz)
void bl1_zgemv(trans1_t transa, conj1_t conjx, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition bl1_gemv.c:255
void bl1_zger(conj1_t conjx, conj1_t conjy, int m, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *a, int a_rs, int a_cs)
Definition bl1_ger.c:194
void bl1_zinvscalv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx)
Definition bl1_invscalv.c:78

References bl1_zaxpyv(), bl1_zconjv(), bl1_zcopyv(), bl1_zdot(), bl1_zgemv(), bl1_zger(), bl1_zinvscalv(), bl1_zscals(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, FLA_free(), FLA_Fused_Ahx_Axpy_Ax_opz_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_opz_var1(), FLA_Househ2_UT_l_opz(), FLA_Househ2s_UT_r_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_ofu_var3().