libflame revision_anchor
Functions
FLA_Bidiag_UT_u_fus_var4.c File Reference

(r)

Functions

FLA_Error FLA_Bidiag_UT_u_ofu_var4 (FLA_Obj A, FLA_Obj TU, FLA_Obj TV)
 
FLA_Error FLA_Bidiag_UT_u_step_ofu_var4 (FLA_Obj A, FLA_Obj Y, FLA_Obj Z, FLA_Obj T, FLA_Obj S)
 
FLA_Error FLA_Bidiag_UT_u_step_ofs_var4 (int m_A, int n_A, int m_TS, float *buff_A, int rs_A, int cs_A, float *buff_Y, int rs_Y, int cs_Y, float *buff_Z, int rs_Z, int cs_Z, float *buff_T, int rs_T, int cs_T, float *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_ofd_var4 (int m_A, int n_A, int m_TS, double *buff_A, int rs_A, int cs_A, double *buff_Y, int rs_Y, int cs_Y, double *buff_Z, int rs_Z, int cs_Z, double *buff_T, int rs_T, int cs_T, double *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_ofc_var4 (int m_A, int n_A, int m_TS, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_Y, int rs_Y, int cs_Y, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_T, int rs_T, int cs_T, scomplex *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_ofz_var4 (int m_A, int n_A, int m_TS, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_Y, int rs_Y, int cs_Y, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_T, int rs_T, int cs_T, dcomplex *buff_S, int rs_S, int cs_S)
 

Function Documentation

◆ FLA_Bidiag_UT_u_ofu_var4()

FLA_Error FLA_Bidiag_UT_u_ofu_var4 ( FLA_Obj  A,
FLA_Obj  TU,
FLA_Obj  TV 
)
14{
16 FLA_Obj Y, Z;
18 dim_t m_A, n_A;
19
21 m_A = FLA_Obj_length( A );
22 n_A = FLA_Obj_width( A );
23
24 FLA_Obj_create( datatype_A, n_A, n_A, 0, 0, &Y );
25 FLA_Obj_create( datatype_A, m_A, n_A, 0, 0, &Z );
26
28
29 FLA_Obj_free( &Y );
30 FLA_Obj_free( &Z );
31
32 return r_val;
33}
FLA_Error FLA_Bidiag_UT_u_step_ofu_var4(FLA_Obj A, FLA_Obj Y, FLA_Obj Z, FLA_Obj T, FLA_Obj S)
Definition FLA_Bidiag_UT_u_fus_var4.c:35
dim_t FLA_Obj_width(FLA_Obj obj)
Definition FLA_Query.c:123
FLA_Error FLA_Obj_create(FLA_Datatype datatype, dim_t m, dim_t n, dim_t rs, dim_t cs, FLA_Obj *obj)
Definition FLA_Obj.c:55
dim_t FLA_Obj_length(FLA_Obj obj)
Definition FLA_Query.c:116
FLA_Error FLA_Obj_free(FLA_Obj *obj)
Definition FLA_Obj.c:588
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition FLA_Query.c:13
int FLA_Error
Definition FLA_type_defs.h:47
int FLA_Datatype
Definition FLA_type_defs.h:49
unsigned long dim_t
Definition FLA_type_defs.h:71
int i
Definition bl1_axmyv2.c:145
Definition FLA_type_defs.h:159

References FLA_Bidiag_UT_u_step_ofu_var4(), FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_Obj_width(), and i.

◆ FLA_Bidiag_UT_u_step_ofc_var4()

FLA_Error FLA_Bidiag_UT_u_step_ofc_var4 ( int  m_A,
int  n_A,
int  m_TS,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_Y,
int  rs_Y,
int  cs_Y,
scomplex buff_Z,
int  rs_Z,
int  cs_Z,
scomplex buff_T,
int  rs_T,
int  cs_T,
scomplex buff_S,
int  rs_S,
int  cs_S 
)
1103{
1107
1112 scomplex beta;
1114 int i;
1115
1116 // b_alg = FLA_Obj_length( T );
1117 int b_alg = m_TS;
1118
1119 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
1120 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &al );
1121 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
1122 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
1123 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
1124 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
1125 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &d );
1126 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &e );
1127 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
1128 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &g );
1129 scomplex* buff_tmp = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1130 scomplex* buff_w = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1131 scomplex* buff_al = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1132 scomplex* buff_ap = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1133 scomplex* buff_u = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1134 scomplex* buff_up = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1135 scomplex* buff_v = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1136 scomplex* buff_d = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1137 scomplex* buff_e = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1138 int inc_tmp = 1;
1139 int inc_w = 1;
1140 int inc_al = 1;
1141 int inc_ap = 1;
1142 int inc_u = 1;
1143 int inc_up = 1;
1144 int inc_v = 1;
1145 int inc_d = 1;
1146 int inc_e = 1;
1147
1148 // FLA_Set( FLA_ZERO, Y );
1149 // FLA_Set( FLA_ZERO, Z );
1150 bl1_csetm( n_A,
1151 b_alg,
1152 buff_0,
1153 buff_Y, rs_Y, cs_Y );
1154 bl1_csetm( m_A,
1155 b_alg,
1156 buff_0,
1157 buff_Z, rs_Z, cs_Z );
1158
1159 for ( i = 0; i < b_alg; ++i )
1160 {
1161 scomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
1162 scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
1163 scomplex* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
1164 scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
1165 scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
1166 scomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
1167 scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
1168 scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
1169
1170 scomplex* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
1171 scomplex* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
1172 scomplex* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
1173
1174 scomplex* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
1175 scomplex* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
1176 scomplex* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
1177
1178 scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
1179 scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
1180
1181 scomplex* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
1182 scomplex* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
1183
1184 scomplex* tmp21 = buff_tmp + (i+1)*inc_tmp;
1185
1186 scomplex* w21 = buff_w + (i+1)*inc_w;
1187
1188 scomplex* a22l = buff_al + (i+1)*inc_al;
1189
1190 scomplex* a12p = buff_ap + (i+1)*inc_ap;
1191
1192 scomplex* u21 = buff_u + (i+1)*inc_u;
1193
1194 scomplex* u21p = buff_up + (i+1)*inc_up;
1195
1196 scomplex* v21 = buff_v + (i+1)*inc_v;
1197
1198 scomplex* d0 = buff_d + (0 )*inc_d;
1199
1200 scomplex* e0 = buff_e + (0 )*inc_e;
1201
1202 scomplex* a12p_t = a12p + (0 )*inc_ap;
1203 scomplex* a12p_b = a12p + (1 )*inc_ap;
1204
1205 scomplex* v21_t = v21 + (0 )*inc_v;
1206 scomplex* v21_b = v21 + (1 )*inc_v;
1207
1208 scomplex* a01_b = a01 + (0 )*cs_A + (i-1)*rs_A;
1209
1210 scomplex* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
1211 scomplex* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
1212
1213 scomplex* ABL = a10t;
1214 scomplex* ZBL = z10t;
1215
1216 scomplex* a2 = alpha11;
1217
1218 int m_ahead = m_A - i - 1;
1219 int n_ahead = n_A - i - 1;
1220 int m_behind = i;
1221 int n_behind = i;
1222
1223 /*------------------------------------------------------------*/
1224
1225 if ( m_behind > 0 )
1226 {
1227 // FLA_Copy( a01_b, last_elem );
1228 // FLA_Set( FLA_ONE, a01_b );
1229 last_elem = *a01_b;
1230 *a01_b = *buff_1;
1231 }
1232
1233 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
1234 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a01, FLA_ONE, a2 );
1237 m_ahead + 1,
1238 n_behind,
1239 buff_m1,
1240 ABL, rs_A, cs_A,
1241 y10t, cs_Y,
1242 buff_1,
1243 a2, rs_A );
1246 m_ahead + 1,
1247 n_behind,
1248 buff_m1,
1249 ZBL, rs_Z, cs_Z,
1250 a01, rs_A,
1251 buff_1,
1252 a2, rs_A );
1253
1254 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
1255 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, A02, z10t, FLA_ONE, a12t );
1258 n_ahead,
1259 n_behind,
1260 buff_m1,
1261 Y20, rs_Y, cs_Y,
1262 a10t, cs_A,
1263 buff_1,
1264 a12t, cs_A );
1267 m_behind,
1268 n_ahead,
1269 buff_m1,
1270 A02, rs_A, cs_A,
1271 z10t, cs_Z,
1272 buff_1,
1273 a12t, cs_A );
1274
1275 if ( m_behind > 0 )
1276 {
1277 // FLA_Copy( last_elem, a01_b );
1278 *a01_b = last_elem;
1279 }
1280
1281 // FLA_Househ2_UT( FLA_LEFT,
1282 // alpha11,
1283 // a21, tau11 );
1284 // FLA_Copy( a21, u21p );
1286 alpha11,
1287 a21, rs_A,
1288 tau11 );
1290 m_ahead,
1291 a21, rs_A,
1292 u21p, inc_up );
1293
1294 if ( n_ahead > 0 )
1295 {
1296 // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
1297 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
1299
1300 // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
1301 // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
1303 n_ahead,
1304 a12t, cs_A,
1305 a12p, inc_ap );
1307 n_ahead,
1309 a12t, cs_A,
1310 a12p, inc_ap );
1311
1312 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21p, FLA_ZERO, d0 );
1313 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, u21p, FLA_ZERO, e0 );
1316 m_ahead,
1317 n_behind,
1318 buff_1,
1319 A20, rs_A, cs_A,
1320 u21p, inc_up,
1321 buff_0,
1322 d0, inc_d );
1325 m_ahead,
1326 n_behind,
1327 buff_1,
1328 Z20, rs_Z, cs_Z,
1329 u21p, inc_up,
1330 buff_0,
1331 e0, inc_e );
1332
1333 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
1334 // FLA_Axpy( FLA_ONE, d0, t01 );
1336 n_behind,
1337 a10t, cs_A,
1338 t01, rs_T );
1340 n_behind,
1341 buff_1,
1342 d0, inc_d,
1343 t01, rs_T );
1344
1345 // FLA_Set( FLA_ZERO, y21 );
1346 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
1347 // FLA_Gemv( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, e0, FLA_ONE, y21 );
1349 buff_0,
1350 y21, rs_Y );
1353 n_ahead,
1354 n_behind,
1355 buff_m1,
1356 Y20, rs_Y, cs_Y,
1357 d0, inc_d,
1358 buff_1,
1359 y21, rs_Y );
1362 m_behind,
1363 n_ahead,
1364 buff_m1,
1365 A02, rs_A, cs_A,
1366 e0, inc_e,
1367 buff_1,
1368 y21, rs_Y );
1369
1370 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, u21p, FLA_ONE, y21 );
1371 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
1372 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
1374 n_ahead,
1375 tau11,
1376 buff_1,
1377 A22, rs_A, cs_A,
1378 u21p, inc_up,
1379 a12p, inc_ap,
1380 y21, rs_Y,
1381 w21, inc_w );
1382
1383 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, Y20, a12p, FLA_ZERO, f0 );
1384 // FLA_Gemvc( FLA_CONJ_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A02, a12p, FLA_ZERO, g0 );
1385 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, w21 );
1386 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, g0, FLA_ONE, w21 );
1387 // FLA_Copy( A22_l, a22l );
1388 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, A20, Y20_t, FLA_ONE, a22l );
1389 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, Z20, A02_l, FLA_ONE, a22l );
1390 // FLA_Copy( g0, s01 );
1392 n_behind,
1393 m_behind,
1394 n_ahead,
1395 buff_m1,
1396 A20, rs_A, cs_A,
1397 Y20, rs_Y, cs_Y,
1398 Z20, rs_Z, cs_Z,
1399 A02, rs_A, cs_A,
1400 A22, rs_A, cs_A,
1401 tmp21, inc_tmp,
1402 s01, rs_S,
1403 a12p, inc_ap,
1404 w21, inc_w,
1405 a22l, inc_al );
1406
1407 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
1409 n_ahead,
1410 buff_1,
1411 a12t, cs_A,
1412 y21, rs_Y );
1413
1414 // FLA_Househ2s_UT( FLA_RIGHT,
1415 // a12p_t,
1416 // a12p_b,
1417 // alpha12, psi11_minus_alpha12, sigma11 );
1419 a12p_t,
1420 a12p_b, inc_ap,
1421 &alpha12,
1423 sigma11 );
1424
1425 // FLA_Copy( a12p, v21 );
1426 // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
1427 // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
1428 // FLA_Conjugate( v21_b );
1430 n_ahead,
1431 a12p, inc_ap,
1432 v21, inc_v );
1435 n_ahead,
1437 v21, inc_v );
1438 bl1_cconjv( n_ahead - 1,
1439 v21_b, inc_v );
1440
1441 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
1442 // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
1445
1446 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_conj_alpha12, A02_l, s01 );
1447 // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, s01 );
1449 n_behind,
1451 A02, rs_A,
1452 s01, rs_S );
1454 n_behind,
1456 s01, rs_S );
1457
1458 // FLA_Copy( alpha12, a12t_l );
1459 // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
1460 *a12t_l = alpha12;
1462 n_ahead - 1,
1463 v21_b, inc_v,
1464 a12t_r, cs_A );
1465 }
1466
1467 // FLA_Copy( u21p, u21 );
1469 m_ahead,
1470 u21p, inc_up,
1471 u21, inc_u );
1472
1473 if ( n_ahead > 0 )
1474 {
1475 // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
1476 // FLA_Scal( FLA_MINUS_ONE, beta );
1477 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
1479 n_ahead,
1480 y21, rs_Y,
1481 v21, inc_v,
1482 &beta );
1484
1485 // FLA_Copy( w21, z21 );
1486 // FLA_Axpy( minus_conj_alpha12, a22l, z21 );
1487 // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
1488 // FLA_Axpy( beta, u21, z21 );
1490 m_ahead,
1491 w21, inc_w,
1492 z21, rs_Z );
1494 m_ahead,
1496 a22l, inc_al,
1497 z21, rs_Z );
1499 m_ahead,
1501 z21, rs_Z );
1503 m_ahead,
1504 &beta,
1505 u21, inc_u,
1506 z21, rs_Z );
1507
1508 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
1509 // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
1511 n_ahead,
1512 tau11,
1513 y21, rs_Y );
1515 m_ahead,
1516 sigma11,
1517 z21, rs_Z );
1518 }
1519 else // if ( n_ahead == 0 )
1520 {
1521 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
1522 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
1524 n_behind,
1525 a10t, cs_A,
1526 t01, rs_T );
1529 m_ahead,
1530 n_behind,
1531 buff_1,
1532 A20, rs_A, cs_A,
1533 u21, inc_u,
1534 buff_1,
1535 t01, rs_T );
1536 }
1537
1538 /*------------------------------------------------------------*/
1539
1540 }
1541
1542 // FLA_Obj_free( &w );
1543 // FLA_Obj_free( &al );
1544 // FLA_Obj_free( &ap );
1545 // FLA_Obj_free( &u );
1546 // FLA_Obj_free( &up );
1547 // FLA_Obj_free( &v );
1548 // FLA_Obj_free( &d );
1549 // FLA_Obj_free( &e );
1550 FLA_free( buff_tmp );
1551 FLA_free( buff_w );
1552 FLA_free( buff_al );
1553 FLA_free( buff_ap );
1554 FLA_free( buff_u );
1555 FLA_free( buff_up );
1556 FLA_free( buff_v );
1557 FLA_free( buff_d );
1558 FLA_free( buff_e );
1559
1560 return FLA_SUCCESS;
1561}
FLA_Error FLA_Fused_UYx_ZVx_opc_var1(int m_U, int n_U, int m_V, int n_V, scomplex *buff_delta, scomplex *buff_U, int rs_U, int cs_U, scomplex *buff_Y, int rs_Y, int cs_Y, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_V, int rs_V, int cs_V, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_temp, int inc_temp, scomplex *buff_t, int inc_t, scomplex *buff_a, int inc_a, scomplex *buff_w, int inc_w, scomplex *buff_al, int inc_al)
Definition FLA_Fused_UYx_ZVx_opt_var1.c:424
FLA_Error FLA_Fused_Ahx_Axpy_Ax_opc_var1(int m_A, int n_A, scomplex *buff_tau, scomplex *buff_beta, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_u, int inc_u, scomplex *buff_a, int inc_a, scomplex *buff_y, int inc_y, scomplex *buff_w, int inc_w)
Definition FLA_Fused_Ahx_Axpy_Ax_opt_var1.c:322
FLA_Obj FLA_MINUS_ONE
Definition FLA_Init.c:22
FLA_Obj FLA_ZERO
Definition FLA_Init.c:20
FLA_Obj FLA_ONE
Definition FLA_Init.c:18
void FLA_free(void *ptr)
Definition FLA_Memory.c:247
void * FLA_malloc(size_t size)
Definition FLA_Memory.c:111
FLA_Error FLA_Househ2s_UT_r_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *alpha, scomplex *chi_1_minus_alpha, scomplex *tau)
Definition FLA_Househ2s_UT.c:589
FLA_Error FLA_Househ2_UT_l_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *tau)
Definition FLA_Househ2_UT.c:390
void bl1_caxpyv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
Definition bl1_axpyv.c:29
void bl1_cconjv(int m, scomplex *x, int incx)
Definition bl1_conjv.c:23
void bl1_ccopyv(conj1_t conj, int m, scomplex *x, int incx, scomplex *y, int incy)
Definition bl1_copyv.c:49
void bl1_cdot(conj1_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho)
Definition bl1_dot.c:39
void bl1_cgemv(trans1_t transa, conj1_t conjx, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition bl1_gemv.c:125
void bl1_cinvscalv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx)
Definition bl1_invscalv.c:52
void bl1_csetv(int m, scomplex *sigma, scomplex *x, int incx)
Definition bl1_setv.c:52
void bl1_csetm(int m, int n, scomplex *sigma, scomplex *a, int a_rs, int a_cs)
Definition bl1_setm.c:61
@ BLIS1_NO_TRANSPOSE
Definition blis_type_defs.h:54
@ BLIS1_CONJ_TRANSPOSE
Definition blis_type_defs.h:57
@ BLIS1_TRANSPOSE
Definition blis_type_defs.h:55
@ BLIS1_CONJ_NO_TRANSPOSE
Definition blis_type_defs.h:56
@ BLIS1_CONJUGATE
Definition blis_type_defs.h:82
@ BLIS1_NO_CONJUGATE
Definition blis_type_defs.h:81
Definition blis_type_defs.h:133

References bl1_caxpyv(), bl1_cconjv(), bl1_ccopyv(), bl1_cdot(), bl1_cgemv(), bl1_cinvscalv(), bl1_csetm(), bl1_csetv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_free(), FLA_Fused_Ahx_Axpy_Ax_opc_var1(), FLA_Fused_UYx_ZVx_opc_var1(), FLA_Househ2_UT_l_opc(), FLA_Househ2s_UT_r_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_ofu_var4().

◆ FLA_Bidiag_UT_u_step_ofd_var4()

FLA_Error FLA_Bidiag_UT_u_step_ofd_var4 ( int  m_A,
int  n_A,
int  m_TS,
double buff_A,
int  rs_A,
int  cs_A,
double buff_Y,
int  rs_Y,
int  cs_Y,
double buff_Z,
int  rs_Z,
int  cs_Z,
double buff_T,
int  rs_T,
int  cs_T,
double buff_S,
int  rs_S,
int  cs_S 
)
633{
634 double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
635 double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
637
638 double alpha12;
639 double minus_conj_alpha12;
640 double psi11_minus_alpha12;
641 double minus_inv_tau11;
642 double beta;
643 double last_elem;
644 int i;
645
646 // b_alg = FLA_Obj_length( T );
647 int b_alg = m_TS;
648
649 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
650 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &al );
651 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
652 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
653 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
654 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
655 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &d );
656 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &e );
657 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
658 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &g );
659 double* buff_tmp = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
660 double* buff_w = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
661 double* buff_al = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
662 double* buff_ap = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
663 double* buff_u = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
664 double* buff_up = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
665 double* buff_v = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
666 double* buff_d = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
667 double* buff_e = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
668 int inc_tmp = 1;
669 int inc_w = 1;
670 int inc_al = 1;
671 int inc_ap = 1;
672 int inc_u = 1;
673 int inc_up = 1;
674 int inc_v = 1;
675 int inc_d = 1;
676 int inc_e = 1;
677
678 // FLA_Set( FLA_ZERO, Y );
679 // FLA_Set( FLA_ZERO, Z );
680 bl1_dsetm( n_A,
681 b_alg,
682 buff_0,
683 buff_Y, rs_Y, cs_Y );
684 bl1_dsetm( m_A,
685 b_alg,
686 buff_0,
687 buff_Z, rs_Z, cs_Z );
688
689 for ( i = 0; i < b_alg; ++i )
690 {
691 double* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
692 double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
693 double* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
694 double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
695 double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
696 double* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
697 double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
698 double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
699
700 double* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
701 double* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
702 double* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
703
704 double* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
705 double* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
706 double* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
707
708 double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
709 double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
710
711 double* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
712 double* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
713
714 double* tmp21 = buff_tmp + (i+1)*inc_tmp;
715
716 double* w21 = buff_w + (i+1)*inc_w;
717
718 double* a22l = buff_al + (i+1)*inc_al;
719
720 double* a12p = buff_ap + (i+1)*inc_ap;
721
722 double* u21 = buff_u + (i+1)*inc_u;
723
724 double* u21p = buff_up + (i+1)*inc_up;
725
726 double* v21 = buff_v + (i+1)*inc_v;
727
728 double* d0 = buff_d + (0 )*inc_d;
729
730 double* e0 = buff_e + (0 )*inc_e;
731
732 double* a12p_t = a12p + (0 )*inc_ap;
733 double* a12p_b = a12p + (1 )*inc_ap;
734
735 double* v21_t = v21 + (0 )*inc_v;
736 double* v21_b = v21 + (1 )*inc_v;
737
738 double* a01_b = a01 + (0 )*cs_A + (i-1)*rs_A;
739
740 double* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
741 double* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
742
743 double* ABL = a10t;
744 double* ZBL = z10t;
745
746 double* a2 = alpha11;
747
748 int m_ahead = m_A - i - 1;
749 int n_ahead = n_A - i - 1;
750 int m_behind = i;
751 int n_behind = i;
752
753 /*------------------------------------------------------------*/
754
755 if ( m_behind > 0 )
756 {
757 // FLA_Copy( a01_b, last_elem );
758 // FLA_Set( FLA_ONE, a01_b );
759 last_elem = *a01_b;
760 *a01_b = *buff_1;
761 }
762
763 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
764 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a01, FLA_ONE, a2 );
767 m_ahead + 1,
768 n_behind,
769 buff_m1,
770 ABL, rs_A, cs_A,
771 y10t, cs_Y,
772 buff_1,
773 a2, rs_A );
776 m_ahead + 1,
777 n_behind,
778 buff_m1,
779 ZBL, rs_Z, cs_Z,
780 a01, rs_A,
781 buff_1,
782 a2, rs_A );
783
784 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
785 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, A02, z10t, FLA_ONE, a12t );
788 n_ahead,
789 n_behind,
790 buff_m1,
791 Y20, rs_Y, cs_Y,
792 a10t, cs_A,
793 buff_1,
794 a12t, cs_A );
797 m_behind,
798 n_ahead,
799 buff_m1,
800 A02, rs_A, cs_A,
801 z10t, cs_Z,
802 buff_1,
803 a12t, cs_A );
804
805 if ( m_behind > 0 )
806 {
807 // FLA_Copy( last_elem, a01_b );
808 *a01_b = last_elem;
809 }
810
811 // FLA_Househ2_UT( FLA_LEFT,
812 // alpha11,
813 // a21, tau11 );
814 // FLA_Copy( a21, u21p );
816 alpha11,
817 a21, rs_A,
818 tau11 );
820 m_ahead,
821 a21, rs_A,
822 u21p, inc_up );
823
824 if ( n_ahead > 0 )
825 {
826 // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
827 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
829
830 // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
831 // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
833 n_ahead,
834 a12t, cs_A,
835 a12p, inc_ap );
837 n_ahead,
839 a12t, cs_A,
840 a12p, inc_ap );
841
842 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21p, FLA_ZERO, d0 );
843 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, u21p, FLA_ZERO, e0 );
846 m_ahead,
847 n_behind,
848 buff_1,
849 A20, rs_A, cs_A,
850 u21p, inc_up,
851 buff_0,
852 d0, inc_d );
855 m_ahead,
856 n_behind,
857 buff_1,
858 Z20, rs_Z, cs_Z,
859 u21p, inc_up,
860 buff_0,
861 e0, inc_e );
862
863 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
864 // FLA_Axpy( FLA_ONE, d0, t01 );
866 n_behind,
867 a10t, cs_A,
868 t01, rs_T );
870 n_behind,
871 buff_1,
872 d0, inc_d,
873 t01, rs_T );
874
875 // FLA_Set( FLA_ZERO, y21 );
876 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
877 // FLA_Gemv( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, e0, FLA_ONE, y21 );
879 buff_0,
880 y21, rs_Y );
883 n_ahead,
884 n_behind,
885 buff_m1,
886 Y20, rs_Y, cs_Y,
887 d0, inc_d,
888 buff_1,
889 y21, rs_Y );
892 m_behind,
893 n_ahead,
894 buff_m1,
895 A02, rs_A, cs_A,
896 e0, inc_e,
897 buff_1,
898 y21, rs_Y );
899
900 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, u21p, FLA_ONE, y21 );
901 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
902 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
904 n_ahead,
905 tau11,
906 buff_1,
907 A22, rs_A, cs_A,
908 u21p, inc_up,
909 a12p, inc_ap,
910 y21, rs_Y,
911 w21, inc_w );
912
913 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, Y20, a12p, FLA_ZERO, f0 );
914 // FLA_Gemvc( FLA_CONJ_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A02, a12p, FLA_ZERO, g0 );
915 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, w21 );
916 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, g0, FLA_ONE, w21 );
917 // FLA_Copy( A22_l, a22l );
918 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, A20, Y20_t, FLA_ONE, a22l );
919 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, Z20, A02_l, FLA_ONE, a22l );
920 // FLA_Copy( g0, s01 );
922 n_behind,
923 m_behind,
924 n_ahead,
925 buff_m1,
926 A20, rs_A, cs_A,
927 Y20, rs_Y, cs_Y,
928 Z20, rs_Z, cs_Z,
929 A02, rs_A, cs_A,
930 A22, rs_A, cs_A,
931 tmp21, inc_tmp,
932 s01, rs_S,
933 a12p, inc_ap,
934 w21, inc_w,
935 a22l, inc_al );
936
937 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
939 n_ahead,
940 buff_1,
941 a12t, cs_A,
942 y21, rs_Y );
943
944 // FLA_Househ2s_UT( FLA_RIGHT,
945 // a12p_t,
946 // a12p_b,
947 // alpha12, psi11_minus_alpha12, sigma11 );
949 a12p_t,
950 a12p_b, inc_ap,
951 &alpha12,
953 sigma11 );
954
955 // FLA_Copy( a12p, v21 );
956 // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
957 // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
958 // FLA_Conjugate( v21_b );
960 n_ahead,
961 a12p, inc_ap,
962 v21, inc_v );
965 n_ahead,
967 v21, inc_v );
968 bl1_dconjv( n_ahead - 1,
969 v21_b, inc_v );
970
971 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
972 // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
975
976 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_conj_alpha12, A02_l, s01 );
977 // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, s01 );
979 n_behind,
981 A02, rs_A,
982 s01, rs_S );
984 n_behind,
986 s01, rs_S );
987
988 // FLA_Copy( alpha12, a12t_l );
989 // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
990 *a12t_l = alpha12;
992 n_ahead - 1,
993 v21_b, inc_v,
994 a12t_r, cs_A );
995 }
996
997 // FLA_Copy( u21p, u21 );
999 m_ahead,
1000 u21p, inc_up,
1001 u21, inc_u );
1002
1003 if ( n_ahead > 0 )
1004 {
1005 // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
1006 // FLA_Scal( FLA_MINUS_ONE, beta );
1007 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
1009 n_ahead,
1010 y21, rs_Y,
1011 v21, inc_v,
1012 &beta );
1014
1015 // FLA_Copy( w21, z21 );
1016 // FLA_Axpy( minus_conj_alpha12, a22l, z21 );
1017 // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
1018 // FLA_Axpy( beta, u21, z21 );
1020 m_ahead,
1021 w21, inc_w,
1022 z21, rs_Z );
1024 m_ahead,
1026 a22l, inc_al,
1027 z21, rs_Z );
1029 m_ahead,
1031 z21, rs_Z );
1033 m_ahead,
1034 &beta,
1035 u21, inc_u,
1036 z21, rs_Z );
1037
1038 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
1039 // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
1041 n_ahead,
1042 tau11,
1043 y21, rs_Y );
1045 m_ahead,
1046 sigma11,
1047 z21, rs_Z );
1048 }
1049 else // if ( n_ahead == 0 )
1050 {
1051 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
1052 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
1054 n_behind,
1055 a10t, cs_A,
1056 t01, rs_T );
1059 m_ahead,
1060 n_behind,
1061 buff_1,
1062 A20, rs_A, cs_A,
1063 u21, inc_u,
1064 buff_1,
1065 t01, rs_T );
1066 }
1067
1068 /*------------------------------------------------------------*/
1069
1070 }
1071
1072 // FLA_Obj_free( &w );
1073 // FLA_Obj_free( &al );
1074 // FLA_Obj_free( &ap );
1075 // FLA_Obj_free( &u );
1076 // FLA_Obj_free( &up );
1077 // FLA_Obj_free( &v );
1078 // FLA_Obj_free( &d );
1079 // FLA_Obj_free( &e );
1080 FLA_free( buff_tmp );
1081 FLA_free( buff_w );
1082 FLA_free( buff_al );
1083 FLA_free( buff_ap );
1084 FLA_free( buff_u );
1085 FLA_free( buff_up );
1086 FLA_free( buff_v );
1087 FLA_free( buff_d );
1088 FLA_free( buff_e );
1089
1090 return FLA_SUCCESS;
1091}
FLA_Error FLA_Fused_UYx_ZVx_opd_var1(int m_U, int n_U, int m_V, int n_V, double *buff_delta, double *buff_U, int rs_U, int cs_U, double *buff_Y, int rs_Y, int cs_Y, double *buff_Z, int rs_Z, int cs_Z, double *buff_V, int rs_V, int cs_V, double *buff_A, int rs_A, int cs_A, double *buff_temp, int inc_temp, double *buff_t, int inc_t, double *buff_a, int inc_a, double *buff_w, int inc_w, double *buff_al, int inc_al)
Definition FLA_Fused_UYx_ZVx_opt_var1.c:331
FLA_Error FLA_Fused_Ahx_Axpy_Ax_opd_var1(int m_A, int n_A, double *buff_tau, double *buff_beta, double *buff_A, int rs_A, int cs_A, double *buff_u, int inc_u, double *buff_a, int inc_a, double *buff_y, int inc_y, double *buff_w, int inc_w)
Definition FLA_Fused_Ahx_Axpy_Ax_opt_var1.c:207
FLA_Error FLA_Househ2_UT_l_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *tau)
Definition FLA_Househ2_UT.c:274
FLA_Error FLA_Househ2s_UT_r_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *alpha, double *chi_1_minus_alpha, double *tau)
Definition FLA_Househ2s_UT.c:572
void bl1_daxpyv(conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy)
Definition bl1_axpyv.c:21
void bl1_dconjv(int m, double *x, int incx)
Definition bl1_conjv.c:18
void bl1_dcopyv(conj1_t conj, int m, double *x, int incx, double *y, int incy)
Definition bl1_copyv.c:42
void bl1_ddot(conj1_t conj, int n, double *x, int incx, double *y, int incy, double *rho)
Definition bl1_dot.c:26
void bl1_dgemv(trans1_t transa, conj1_t conjx, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
Definition bl1_gemv.c:69
void bl1_dinvscalv(conj1_t conj, int n, double *alpha, double *x, int incx)
Definition bl1_invscalv.c:26
void bl1_dsetv(int m, double *sigma, double *x, int incx)
Definition bl1_setv.c:39
void bl1_dsetm(int m, int n, double *sigma, double *a, int a_rs, int a_cs)
Definition bl1_setm.c:45

References bl1_daxpyv(), bl1_dconjv(), bl1_dcopyv(), bl1_ddot(), bl1_dgemv(), bl1_dinvscalv(), bl1_dsetm(), bl1_dsetv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_free(), FLA_Fused_Ahx_Axpy_Ax_opd_var1(), FLA_Fused_UYx_ZVx_opd_var1(), FLA_Househ2_UT_l_opd(), FLA_Househ2s_UT_r_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_ofu_var4().

◆ FLA_Bidiag_UT_u_step_ofs_var4()

FLA_Error FLA_Bidiag_UT_u_step_ofs_var4 ( int  m_A,
int  n_A,
int  m_TS,
float buff_A,
int  rs_A,
int  cs_A,
float buff_Y,
int  rs_Y,
int  cs_Y,
float buff_Z,
int  rs_Z,
int  cs_Z,
float buff_T,
int  rs_T,
int  cs_T,
float buff_S,
int  rs_S,
int  cs_S 
)
163{
164 float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
165 float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
167
168 float alpha12;
169 float minus_conj_alpha12;
171 float minus_inv_tau11;
172 float beta;
173 float last_elem;
174 int i;
175
176 // b_alg = FLA_Obj_length( T );
177 int b_alg = m_TS;
178
179 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
180 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &al );
181 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
182 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
183 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
184 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
185 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &d );
186 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &e );
187 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
188 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &g );
189 float* buff_tmp = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
190 float* buff_w = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
191 float* buff_al = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
192 float* buff_ap = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
193 float* buff_u = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
194 float* buff_up = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
195 float* buff_v = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
196 float* buff_d = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
197 float* buff_e = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
198 int inc_tmp = 1;
199 int inc_w = 1;
200 int inc_al = 1;
201 int inc_ap = 1;
202 int inc_u = 1;
203 int inc_up = 1;
204 int inc_v = 1;
205 int inc_d = 1;
206 int inc_e = 1;
207
208 // FLA_Set( FLA_ZERO, Y );
209 // FLA_Set( FLA_ZERO, Z );
210 bl1_ssetm( n_A,
211 b_alg,
212 buff_0,
213 buff_Y, rs_Y, cs_Y );
214 bl1_ssetm( m_A,
215 b_alg,
216 buff_0,
217 buff_Z, rs_Z, cs_Z );
218
219 for ( i = 0; i < b_alg; ++i )
220 {
221 float* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
222 float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
223 float* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
224 float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
225 float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
226 float* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
227 float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
228 float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
229
230 float* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
231 float* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
232 float* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
233
234 float* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
235 float* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
236 float* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
237
238 float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
239 float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
240
241 float* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
242 float* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
243
244 float* tmp21 = buff_tmp + (i+1)*inc_tmp;
245
246 float* w21 = buff_w + (i+1)*inc_w;
247
248 float* a22l = buff_al + (i+1)*inc_al;
249
250 float* a12p = buff_ap + (i+1)*inc_ap;
251
252 float* u21 = buff_u + (i+1)*inc_u;
253
254 float* u21p = buff_up + (i+1)*inc_up;
255
256 float* v21 = buff_v + (i+1)*inc_v;
257
258 float* d0 = buff_d + (0 )*inc_d;
259
260 float* e0 = buff_e + (0 )*inc_e;
261
262 float* a12p_t = a12p + (0 )*inc_ap;
263 float* a12p_b = a12p + (1 )*inc_ap;
264
265 float* v21_t = v21 + (0 )*inc_v;
266 float* v21_b = v21 + (1 )*inc_v;
267
268 float* a01_b = a01 + (0 )*cs_A + (i-1)*rs_A;
269
270 float* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
271 float* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
272
273 float* ABL = a10t;
274 float* ZBL = z10t;
275
276 float* a2 = alpha11;
277
278 int m_ahead = m_A - i - 1;
279 int n_ahead = n_A - i - 1;
280 int m_behind = i;
281 int n_behind = i;
282
283 /*------------------------------------------------------------*/
284
285 if ( m_behind > 0 )
286 {
287 // FLA_Copy( a01_b, last_elem );
288 // FLA_Set( FLA_ONE, a01_b );
289 last_elem = *a01_b;
290 *a01_b = *buff_1;
291 }
292
293 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
294 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a01, FLA_ONE, a2 );
297 m_ahead + 1,
298 n_behind,
299 buff_m1,
300 ABL, rs_A, cs_A,
301 y10t, cs_Y,
302 buff_1,
303 a2, rs_A );
306 m_ahead + 1,
307 n_behind,
308 buff_m1,
309 ZBL, rs_Z, cs_Z,
310 a01, rs_A,
311 buff_1,
312 a2, rs_A );
313
314 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
315 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, A02, z10t, FLA_ONE, a12t );
318 n_ahead,
319 n_behind,
320 buff_m1,
321 Y20, rs_Y, cs_Y,
322 a10t, cs_A,
323 buff_1,
324 a12t, cs_A );
327 m_behind,
328 n_ahead,
329 buff_m1,
330 A02, rs_A, cs_A,
331 z10t, cs_Z,
332 buff_1,
333 a12t, cs_A );
334
335 if ( m_behind > 0 )
336 {
337 // FLA_Copy( last_elem, a01_b );
338 *a01_b = last_elem;
339 }
340
341 // FLA_Househ2_UT( FLA_LEFT,
342 // alpha11,
343 // a21, tau11 );
344 // FLA_Copy( a21, u21p );
346 alpha11,
347 a21, rs_A,
348 tau11 );
350 m_ahead,
351 a21, rs_A,
352 u21p, inc_up );
353
354 if ( n_ahead > 0 )
355 {
356 // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
357 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
359
360 // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
361 // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
363 n_ahead,
364 a12t, cs_A,
365 a12p, inc_ap );
367 n_ahead,
369 a12t, cs_A,
370 a12p, inc_ap );
371
372 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21p, FLA_ZERO, d0 );
373 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, u21p, FLA_ZERO, e0 );
376 m_ahead,
377 n_behind,
378 buff_1,
379 A20, rs_A, cs_A,
380 u21p, inc_up,
381 buff_0,
382 d0, inc_d );
385 m_ahead,
386 n_behind,
387 buff_1,
388 Z20, rs_Z, cs_Z,
389 u21p, inc_up,
390 buff_0,
391 e0, inc_e );
392
393 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
394 // FLA_Axpy( FLA_ONE, d0, t01 );
396 n_behind,
397 a10t, cs_A,
398 t01, rs_T );
400 n_behind,
401 buff_1,
402 d0, inc_d,
403 t01, rs_T );
404
405 // FLA_Set( FLA_ZERO, y21 );
406 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
407 // FLA_Gemv( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, e0, FLA_ONE, y21 );
409 buff_0,
410 y21, rs_Y );
413 n_ahead,
414 n_behind,
415 buff_m1,
416 Y20, rs_Y, cs_Y,
417 d0, inc_d,
418 buff_1,
419 y21, rs_Y );
422 m_behind,
423 n_ahead,
424 buff_m1,
425 A02, rs_A, cs_A,
426 e0, inc_e,
427 buff_1,
428 y21, rs_Y );
429
430 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, u21p, FLA_ONE, y21 );
431 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
432 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
434 n_ahead,
435 tau11,
436 buff_1,
437 A22, rs_A, cs_A,
438 u21p, inc_up,
439 a12p, inc_ap,
440 y21, rs_Y,
441 w21, inc_w );
442
443 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, Y20, a12p, FLA_ZERO, f0 );
444 // FLA_Gemvc( FLA_CONJ_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A02, a12p, FLA_ZERO, g0 );
445 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, w21 );
446 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, g0, FLA_ONE, w21 );
447 // FLA_Copy( A22_l, a22l );
448 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, A20, Y20_t, FLA_ONE, a22l );
449 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, Z20, A02_l, FLA_ONE, a22l );
450 // FLA_Copy( g0, s01 );
452 n_behind,
453 m_behind,
454 n_ahead,
455 buff_m1,
456 A20, rs_A, cs_A,
457 Y20, rs_Y, cs_Y,
458 Z20, rs_Z, cs_Z,
459 A02, rs_A, cs_A,
460 A22, rs_A, cs_A,
461 tmp21, inc_tmp,
462 s01, rs_S,
463 a12p, inc_ap,
464 w21, inc_w,
465 a22l, inc_al );
466
467 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
469 n_ahead,
470 buff_1,
471 a12t, cs_A,
472 y21, rs_Y );
473
474 // FLA_Househ2s_UT( FLA_RIGHT,
475 // a12p_t,
476 // a12p_b,
477 // alpha12, psi11_minus_alpha12, sigma11 );
479 a12p_t,
480 a12p_b, inc_ap,
481 &alpha12,
483 sigma11 );
484
485 // FLA_Copy( a12p, v21 );
486 // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
487 // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
488 // FLA_Conjugate( v21_b );
490 n_ahead,
491 a12p, inc_ap,
492 v21, inc_v );
495 n_ahead,
497 v21, inc_v );
498 bl1_sconjv( n_ahead - 1,
499 v21_b, inc_v );
500
501 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
502 // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
505
506 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_conj_alpha12, A02_l, s01 );
507 // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, s01 );
509 n_behind,
511 A02, rs_A,
512 s01, rs_S );
514 n_behind,
516 s01, rs_S );
517
518 // FLA_Copy( alpha12, a12t_l );
519 // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
520 *a12t_l = alpha12;
522 n_ahead - 1,
523 v21_b, inc_v,
524 a12t_r, cs_A );
525 }
526
527 // FLA_Copy( u21p, u21 );
529 m_ahead,
530 u21p, inc_up,
531 u21, inc_u );
532
533 if ( n_ahead > 0 )
534 {
535 // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
536 // FLA_Scal( FLA_MINUS_ONE, beta );
537 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
539 n_ahead,
540 y21, rs_Y,
541 v21, inc_v,
542 &beta );
544
545 // FLA_Copy( w21, z21 );
546 // FLA_Axpy( minus_conj_alpha12, a22l, z21 );
547 // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
548 // FLA_Axpy( beta, u21, z21 );
550 m_ahead,
551 w21, inc_w,
552 z21, rs_Z );
554 m_ahead,
556 a22l, inc_al,
557 z21, rs_Z );
559 m_ahead,
561 z21, rs_Z );
563 m_ahead,
564 &beta,
565 u21, inc_u,
566 z21, rs_Z );
567
568 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
569 // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
571 n_ahead,
572 tau11,
573 y21, rs_Y );
575 m_ahead,
576 sigma11,
577 z21, rs_Z );
578 }
579 else // if ( n_ahead == 0 )
580 {
581 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
582 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
584 n_behind,
585 a10t, cs_A,
586 t01, rs_T );
589 m_ahead,
590 n_behind,
591 buff_1,
592 A20, rs_A, cs_A,
593 u21, inc_u,
594 buff_1,
595 t01, rs_T );
596 }
597
598 /*------------------------------------------------------------*/
599
600 }
601
602 // FLA_Obj_free( &w );
603 // FLA_Obj_free( &al );
604 // FLA_Obj_free( &ap );
605 // FLA_Obj_free( &u );
606 // FLA_Obj_free( &up );
607 // FLA_Obj_free( &v );
608 // FLA_Obj_free( &d );
609 // FLA_Obj_free( &e );
611 FLA_free( buff_w );
612 FLA_free( buff_al );
613 FLA_free( buff_ap );
614 FLA_free( buff_u );
615 FLA_free( buff_up );
616 FLA_free( buff_v );
617 FLA_free( buff_d );
618 FLA_free( buff_e );
619
620 return FLA_SUCCESS;
621}
FLA_Error FLA_Fused_UYx_ZVx_ops_var1(int m_U, int n_U, int m_V, int n_V, float *buff_delta, float *buff_U, int rs_U, int cs_U, float *buff_Y, int rs_Y, int cs_Y, float *buff_Z, int rs_Z, int cs_Z, float *buff_V, int rs_V, int cs_V, float *buff_A, int rs_A, int cs_A, float *buff_temp, int inc_temp, float *buff_t, int inc_t, float *buff_a, int inc_a, float *buff_w, int inc_w, float *buff_al, int inc_al)
Definition FLA_Fused_UYx_ZVx_opt_var1.c:201
FLA_Error FLA_Fused_Ahx_Axpy_Ax_ops_var1(int m_A, int n_A, float *buff_tau, float *buff_beta, float *buff_A, int rs_A, int cs_A, float *buff_u, int inc_u, float *buff_a, int inc_a, float *buff_y, int inc_y, float *buff_w, int inc_w)
Definition FLA_Fused_Ahx_Axpy_Ax_opt_var1.c:143
FLA_Error FLA_Househ2_UT_l_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *tau)
Definition FLA_Househ2_UT.c:160
FLA_Error FLA_Househ2s_UT_r_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *alpha, float *chi_1_minus_alpha, float *tau)
Definition FLA_Househ2s_UT.c:555
void bl1_saxpyv(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy)
Definition bl1_axpyv.c:13
void bl1_sconjv(int m, float *x, int incx)
Definition bl1_conjv.c:13
void bl1_scopyv(conj1_t conj, int m, float *x, int incx, float *y, int incy)
Definition bl1_copyv.c:35
void bl1_sdot(conj1_t conj, int n, float *x, int incx, float *y, int incy, float *rho)
Definition bl1_dot.c:13
void bl1_sgemv(trans1_t transa, conj1_t conjx, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
Definition bl1_gemv.c:13
void bl1_sinvscalv(conj1_t conj, int n, float *alpha, float *x, int incx)
Definition bl1_invscalv.c:13
void bl1_ssetm(int m, int n, float *sigma, float *a, int a_rs, int a_cs)
Definition bl1_setm.c:29
void bl1_ssetv(int m, float *sigma, float *x, int incx)
Definition bl1_setv.c:26

References bl1_saxpyv(), bl1_sconjv(), bl1_scopyv(), bl1_sdot(), bl1_sgemv(), bl1_sinvscalv(), bl1_ssetm(), bl1_ssetv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_free(), FLA_Fused_Ahx_Axpy_Ax_ops_var1(), FLA_Fused_UYx_ZVx_ops_var1(), FLA_Househ2_UT_l_ops(), FLA_Househ2s_UT_r_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_ofu_var4().

◆ FLA_Bidiag_UT_u_step_ofu_var4()

FLA_Error FLA_Bidiag_UT_u_step_ofu_var4 ( FLA_Obj  A,
FLA_Obj  Y,
FLA_Obj  Z,
FLA_Obj  T,
FLA_Obj  S 
)
36{
37 FLA_Datatype datatype;
38 int m_A, n_A, m_TS;
39 int rs_A, cs_A;
40 int rs_Y, cs_Y;
41 int rs_Z, cs_Z;
42 int rs_T, cs_T;
43 int rs_S, cs_S;
44
45 datatype = FLA_Obj_datatype( A );
46
47 m_A = FLA_Obj_length( A );
48 n_A = FLA_Obj_width( A );
50
53
56
59
62
65
66
67 switch ( datatype )
68 {
69 case FLA_FLOAT:
70 {
71 float* buff_A = FLA_FLOAT_PTR( A );
72 float* buff_Y = FLA_FLOAT_PTR( Y );
73 float* buff_Z = FLA_FLOAT_PTR( Z );
74 float* buff_T = FLA_FLOAT_PTR( T );
75 float* buff_S = FLA_FLOAT_PTR( S );
76
78 n_A,
79 m_TS,
84 buff_S, rs_S, cs_S );
85
86 break;
87 }
88
89 case FLA_DOUBLE:
90 {
91 double* buff_A = FLA_DOUBLE_PTR( A );
92 double* buff_Y = FLA_DOUBLE_PTR( Y );
93 double* buff_Z = FLA_DOUBLE_PTR( Z );
94 double* buff_T = FLA_DOUBLE_PTR( T );
95 double* buff_S = FLA_DOUBLE_PTR( S );
96
98 n_A,
99 m_TS,
100 buff_A, rs_A, cs_A,
101 buff_Y, rs_Y, cs_Y,
102 buff_Z, rs_Z, cs_Z,
103 buff_T, rs_T, cs_T,
104 buff_S, rs_S, cs_S );
105
106 break;
107 }
108
109 case FLA_COMPLEX:
110 {
116
118 n_A,
119 m_TS,
120 buff_A, rs_A, cs_A,
121 buff_Y, rs_Y, cs_Y,
122 buff_Z, rs_Z, cs_Z,
123 buff_T, rs_T, cs_T,
124 buff_S, rs_S, cs_S );
125
126 break;
127 }
128
130 {
136
138 n_A,
139 m_TS,
140 buff_A, rs_A, cs_A,
141 buff_Y, rs_Y, cs_Y,
142 buff_Z, rs_Z, cs_Z,
143 buff_T, rs_T, cs_T,
144 buff_S, rs_S, cs_S );
145
146 break;
147 }
148 }
149
150 return FLA_SUCCESS;
151}
FLA_Error FLA_Bidiag_UT_u_step_ofz_var4(int m_A, int n_A, int m_TS, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_Y, int rs_Y, int cs_Y, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_T, int rs_T, int cs_T, dcomplex *buff_S, int rs_S, int cs_S)
Definition FLA_Bidiag_UT_u_fus_var4.c:1565
FLA_Error FLA_Bidiag_UT_u_step_ofs_var4(int m_A, int n_A, int m_TS, float *buff_A, int rs_A, int cs_A, float *buff_Y, int rs_Y, int cs_Y, float *buff_Z, int rs_Z, int cs_Z, float *buff_T, int rs_T, int cs_T, float *buff_S, int rs_S, int cs_S)
Definition FLA_Bidiag_UT_u_fus_var4.c:155
FLA_Error FLA_Bidiag_UT_u_step_ofc_var4(int m_A, int n_A, int m_TS, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_Y, int rs_Y, int cs_Y, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_T, int rs_T, int cs_T, scomplex *buff_S, int rs_S, int cs_S)
Definition FLA_Bidiag_UT_u_fus_var4.c:1095
FLA_Error FLA_Bidiag_UT_u_step_ofd_var4(int m_A, int n_A, int m_TS, double *buff_A, int rs_A, int cs_A, double *buff_Y, int rs_Y, int cs_Y, double *buff_Z, int rs_Z, int cs_Z, double *buff_T, int rs_T, int cs_T, double *buff_S, int rs_S, int cs_S)
Definition FLA_Bidiag_UT_u_fus_var4.c:625
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition FLA_Query.c:167
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition FLA_Query.c:174
Definition blis_type_defs.h:138

References FLA_Bidiag_UT_u_step_ofc_var4(), FLA_Bidiag_UT_u_step_ofd_var4(), FLA_Bidiag_UT_u_step_ofs_var4(), FLA_Bidiag_UT_u_step_ofz_var4(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), and i.

Referenced by FLA_Bidiag_UT_u_blf_var4(), and FLA_Bidiag_UT_u_ofu_var4().

◆ FLA_Bidiag_UT_u_step_ofz_var4()

FLA_Error FLA_Bidiag_UT_u_step_ofz_var4 ( int  m_A,
int  n_A,
int  m_TS,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_Y,
int  rs_Y,
int  cs_Y,
dcomplex buff_Z,
int  rs_Z,
int  cs_Z,
dcomplex buff_T,
int  rs_T,
int  cs_T,
dcomplex buff_S,
int  rs_S,
int  cs_S 
)
1573{
1577
1582 dcomplex beta;
1584 int i;
1585
1586 // b_alg = FLA_Obj_length( T );
1587 int b_alg = m_TS;
1588
1589 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
1590 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &al );
1591 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
1592 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
1593 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
1594 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
1595 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &d );
1596 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &e );
1597 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
1598 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &g );
1599 dcomplex* buff_tmp = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1600 dcomplex* buff_w = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1601 dcomplex* buff_al = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1602 dcomplex* buff_ap = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1603 dcomplex* buff_u = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1604 dcomplex* buff_up = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1605 dcomplex* buff_v = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1606 dcomplex* buff_d = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1607 dcomplex* buff_e = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1608 int inc_tmp = 1;
1609 int inc_w = 1;
1610 int inc_al = 1;
1611 int inc_ap = 1;
1612 int inc_u = 1;
1613 int inc_up = 1;
1614 int inc_v = 1;
1615 int inc_d = 1;
1616 int inc_e = 1;
1617
1618 // FLA_Set( FLA_ZERO, Y );
1619 // FLA_Set( FLA_ZERO, Z );
1620 bl1_zsetm( n_A,
1621 b_alg,
1622 buff_0,
1623 buff_Y, rs_Y, cs_Y );
1624 bl1_zsetm( m_A,
1625 b_alg,
1626 buff_0,
1627 buff_Z, rs_Z, cs_Z );
1628
1629 for ( i = 0; i < b_alg; ++i )
1630 {
1631 dcomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
1632 dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
1633 dcomplex* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
1634 dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
1635 dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
1636 dcomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
1637 dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
1638 dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
1639
1640 dcomplex* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
1641 dcomplex* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
1642 dcomplex* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
1643
1644 dcomplex* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
1645 dcomplex* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
1646 dcomplex* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
1647
1648 dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
1649 dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
1650
1651 dcomplex* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
1652 dcomplex* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
1653
1654 dcomplex* tmp21 = buff_tmp + (i+1)*inc_tmp;
1655
1656 dcomplex* w21 = buff_w + (i+1)*inc_w;
1657
1658 dcomplex* a22l = buff_al + (i+1)*inc_al;
1659
1660 dcomplex* a12p = buff_ap + (i+1)*inc_ap;
1661
1662 dcomplex* u21 = buff_u + (i+1)*inc_u;
1663
1664 dcomplex* u21p = buff_up + (i+1)*inc_up;
1665
1666 dcomplex* v21 = buff_v + (i+1)*inc_v;
1667
1668 dcomplex* d0 = buff_d + (0 )*inc_d;
1669
1670 dcomplex* e0 = buff_e + (0 )*inc_e;
1671
1672 dcomplex* a12p_t = a12p + (0 )*inc_ap;
1673 dcomplex* a12p_b = a12p + (1 )*inc_ap;
1674
1675 dcomplex* v21_t = v21 + (0 )*inc_v;
1676 dcomplex* v21_b = v21 + (1 )*inc_v;
1677
1678 dcomplex* a01_b = a01 + (0 )*cs_A + (i-1)*rs_A;
1679
1680 dcomplex* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
1681 dcomplex* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
1682
1683 dcomplex* ABL = a10t;
1684 dcomplex* ZBL = z10t;
1685
1686 dcomplex* a2 = alpha11;
1687
1688 int m_ahead = m_A - i - 1;
1689 int n_ahead = n_A - i - 1;
1690 int m_behind = i;
1691 int n_behind = i;
1692
1693 /*------------------------------------------------------------*/
1694
1695 if ( m_behind > 0 )
1696 {
1697 // FLA_Copy( a01_b, last_elem );
1698 // FLA_Set( FLA_ONE, a01_b );
1699 last_elem = *a01_b;
1700 *a01_b = *buff_1;
1701 }
1702
1703 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
1704 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a01, FLA_ONE, a2 );
1707 m_ahead + 1,
1708 n_behind,
1709 buff_m1,
1710 ABL, rs_A, cs_A,
1711 y10t, cs_Y,
1712 buff_1,
1713 a2, rs_A );
1716 m_ahead + 1,
1717 n_behind,
1718 buff_m1,
1719 ZBL, rs_Z, cs_Z,
1720 a01, rs_A,
1721 buff_1,
1722 a2, rs_A );
1723
1724 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
1725 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, A02, z10t, FLA_ONE, a12t );
1728 n_ahead,
1729 n_behind,
1730 buff_m1,
1731 Y20, rs_Y, cs_Y,
1732 a10t, cs_A,
1733 buff_1,
1734 a12t, cs_A );
1737 m_behind,
1738 n_ahead,
1739 buff_m1,
1740 A02, rs_A, cs_A,
1741 z10t, cs_Z,
1742 buff_1,
1743 a12t, cs_A );
1744
1745 if ( m_behind > 0 )
1746 {
1747 // FLA_Copy( last_elem, a01_b );
1748 *a01_b = last_elem;
1749 }
1750
1751 // FLA_Househ2_UT( FLA_LEFT,
1752 // alpha11,
1753 // a21, tau11 );
1754 // FLA_Copy( a21, u21p );
1756 alpha11,
1757 a21, rs_A,
1758 tau11 );
1760 m_ahead,
1761 a21, rs_A,
1762 u21p, inc_up );
1763
1764 if ( n_ahead > 0 )
1765 {
1766 // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
1767 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
1769
1770 // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
1771 // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
1773 n_ahead,
1774 a12t, cs_A,
1775 a12p, inc_ap );
1777 n_ahead,
1779 a12t, cs_A,
1780 a12p, inc_ap );
1781
1782 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21p, FLA_ZERO, d0 );
1783 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, u21p, FLA_ZERO, e0 );
1786 m_ahead,
1787 n_behind,
1788 buff_1,
1789 A20, rs_A, cs_A,
1790 u21p, inc_up,
1791 buff_0,
1792 d0, inc_d );
1795 m_ahead,
1796 n_behind,
1797 buff_1,
1798 Z20, rs_Z, cs_Z,
1799 u21p, inc_up,
1800 buff_0,
1801 e0, inc_e );
1802
1803 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
1804 // FLA_Axpy( FLA_ONE, d0, t01 );
1806 n_behind,
1807 a10t, cs_A,
1808 t01, rs_T );
1810 n_behind,
1811 buff_1,
1812 d0, inc_d,
1813 t01, rs_T );
1814
1815 // FLA_Set( FLA_ZERO, y21 );
1816 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
1817 // FLA_Gemv( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, e0, FLA_ONE, y21 );
1819 buff_0,
1820 y21, rs_Y );
1823 n_ahead,
1824 n_behind,
1825 buff_m1,
1826 Y20, rs_Y, cs_Y,
1827 d0, inc_d,
1828 buff_1,
1829 y21, rs_Y );
1832 m_behind,
1833 n_ahead,
1834 buff_m1,
1835 A02, rs_A, cs_A,
1836 e0, inc_e,
1837 buff_1,
1838 y21, rs_Y );
1839
1840 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, u21p, FLA_ONE, y21 );
1841 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
1842 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
1844 n_ahead,
1845 tau11,
1846 buff_1,
1847 A22, rs_A, cs_A,
1848 u21p, inc_up,
1849 a12p, inc_ap,
1850 y21, rs_Y,
1851 w21, inc_w );
1852
1853 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, Y20, a12p, FLA_ZERO, f0 );
1854 // FLA_Gemvc( FLA_CONJ_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A02, a12p, FLA_ZERO, g0 );
1855 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, w21 );
1856 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, g0, FLA_ONE, w21 );
1857 // FLA_Copy( A22_l, a22l );
1858 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, A20, Y20_t, FLA_ONE, a22l );
1859 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, Z20, A02_l, FLA_ONE, a22l );
1860 // FLA_Copy( g0, s01 );
1862 n_behind,
1863 m_behind,
1864 n_ahead,
1865 buff_m1,
1866 A20, rs_A, cs_A,
1867 Y20, rs_Y, cs_Y,
1868 Z20, rs_Z, cs_Z,
1869 A02, rs_A, cs_A,
1870 A22, rs_A, cs_A,
1871 tmp21, inc_tmp,
1872 s01, rs_S,
1873 a12p, inc_ap,
1874 w21, inc_w,
1875 a22l, inc_al );
1876
1877 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
1879 n_ahead,
1880 buff_1,
1881 a12t, cs_A,
1882 y21, rs_Y );
1883
1884 // FLA_Househ2s_UT( FLA_RIGHT,
1885 // a12p_t,
1886 // a12p_b,
1887 // alpha12, psi11_minus_alpha12, sigma11 );
1889 a12p_t,
1890 a12p_b, inc_ap,
1891 &alpha12,
1893 sigma11 );
1894
1895 // FLA_Copy( a12p, v21 );
1896 // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
1897 // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
1898 // FLA_Conjugate( v21_b );
1900 n_ahead,
1901 a12p, inc_ap,
1902 v21, inc_v );
1905 n_ahead,
1907 v21, inc_v );
1908 bl1_zconjv( n_ahead - 1,
1909 v21_b, inc_v );
1910
1911 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
1912 // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
1915
1916 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_conj_alpha12, A02_l, s01 );
1917 // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, s01 );
1919 n_behind,
1921 A02, rs_A,
1922 s01, rs_S );
1924 n_behind,
1926 s01, rs_S );
1927
1928 // FLA_Copy( alpha12, a12t_l );
1929 // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
1930 *a12t_l = alpha12;
1932 n_ahead - 1,
1933 v21_b, inc_v,
1934 a12t_r, cs_A );
1935 }
1936
1937 // FLA_Copy( u21p, u21 );
1939 m_ahead,
1940 u21p, inc_up,
1941 u21, inc_u );
1942
1943 if ( n_ahead > 0 )
1944 {
1945 // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
1946 // FLA_Scal( FLA_MINUS_ONE, beta );
1947 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
1949 n_ahead,
1950 y21, rs_Y,
1951 v21, inc_v,
1952 &beta );
1954
1955 // FLA_Copy( w21, z21 );
1956 // FLA_Axpy( minus_conj_alpha12, a22l, z21 );
1957 // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
1958 // FLA_Axpy( beta, u21, z21 );
1960 m_ahead,
1961 w21, inc_w,
1962 z21, rs_Z );
1964 m_ahead,
1966 a22l, inc_al,
1967 z21, rs_Z );
1969 m_ahead,
1971 z21, rs_Z );
1973 m_ahead,
1974 &beta,
1975 u21, inc_u,
1976 z21, rs_Z );
1977
1978 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
1979 // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
1981 n_ahead,
1982 tau11,
1983 y21, rs_Y );
1985 m_ahead,
1986 sigma11,
1987 z21, rs_Z );
1988 }
1989 else // if ( n_ahead == 0 )
1990 {
1991 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
1992 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
1994 n_behind,
1995 a10t, cs_A,
1996 t01, rs_T );
1999 m_ahead,
2000 n_behind,
2001 buff_1,
2002 A20, rs_A, cs_A,
2003 u21, inc_u,
2004 buff_1,
2005 t01, rs_T );
2006 }
2007
2008 /*------------------------------------------------------------*/
2009
2010 }
2011
2012 // FLA_Obj_free( &w );
2013 // FLA_Obj_free( &al );
2014 // FLA_Obj_free( &ap );
2015 // FLA_Obj_free( &u );
2016 // FLA_Obj_free( &up );
2017 // FLA_Obj_free( &v );
2018 // FLA_Obj_free( &d );
2019 // FLA_Obj_free( &e );
2020 FLA_free( buff_tmp );
2021 FLA_free( buff_w );
2022 FLA_free( buff_al );
2023 FLA_free( buff_ap );
2024 FLA_free( buff_u );
2025 FLA_free( buff_up );
2026 FLA_free( buff_v );
2027 FLA_free( buff_d );
2028 FLA_free( buff_e );
2029
2030 return FLA_SUCCESS;
2031}
FLA_Error FLA_Fused_UYx_ZVx_opz_var1(int m_U, int n_U, int m_V, int n_V, dcomplex *buff_delta, dcomplex *buff_U, int rs_U, int cs_U, dcomplex *buff_Y, int rs_Y, int cs_Y, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_V, int rs_V, int cs_V, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_temp, int inc_temp, dcomplex *buff_t, int inc_t, dcomplex *buff_a, int inc_a, dcomplex *buff_w, int inc_w, dcomplex *buff_al, int inc_al)
Definition FLA_Fused_UYx_ZVx_opt_var1.c:542
FLA_Error FLA_Fused_Ahx_Axpy_Ax_opz_var1(int m_A, int n_A, dcomplex *buff_tau, dcomplex *buff_beta, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_u, int inc_u, dcomplex *buff_a, int inc_a, dcomplex *buff_y, int inc_y, dcomplex *buff_w, int inc_w)
Definition FLA_Fused_Ahx_Axpy_Ax_opt_var1.c:390
FLA_Error FLA_Househ2_UT_l_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *tau)
Definition FLA_Househ2_UT.c:521
FLA_Error FLA_Househ2s_UT_r_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *alpha, dcomplex *chi_1_minus_alpha, dcomplex *tau)
Definition FLA_Househ2s_UT.c:610
void bl1_zaxpyv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy)
Definition bl1_axpyv.c:60
void bl1_zconjv(int m, dcomplex *x, int incx)
Definition bl1_conjv.c:34
void bl1_zcopyv(conj1_t conj, int m, dcomplex *x, int incx, dcomplex *y, int incy)
Definition bl1_copyv.c:63
void bl1_zdot(conj1_t conj, int n, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *rho)
Definition bl1_dot.c:65
bl1_zscals(beta, rho_yz)
void bl1_zgemv(trans1_t transa, conj1_t conjx, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition bl1_gemv.c:255
void bl1_zinvscalv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx)
Definition bl1_invscalv.c:78
void bl1_zsetv(int m, dcomplex *sigma, dcomplex *x, int incx)
Definition bl1_setv.c:66
void bl1_zsetm(int m, int n, dcomplex *sigma, dcomplex *a, int a_rs, int a_cs)
Definition bl1_setm.c:78

References bl1_zaxpyv(), bl1_zconjv(), bl1_zcopyv(), bl1_zdot(), bl1_zgemv(), bl1_zinvscalv(), bl1_zscals(), bl1_zsetm(), bl1_zsetv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_free(), FLA_Fused_Ahx_Axpy_Ax_opz_var1(), FLA_Fused_UYx_ZVx_opz_var1(), FLA_Househ2_UT_l_opz(), FLA_Househ2s_UT_r_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_ofu_var4().