libflame revision_anchor
Functions
FLA_Bidiag_UT_u_opt_var4.c File Reference

(r)

Functions

FLA_Error FLA_Bidiag_UT_u_opt_var4 (FLA_Obj A, FLA_Obj TU, FLA_Obj TV)
 
FLA_Error FLA_Bidiag_UT_u_step_opt_var4 (FLA_Obj A, FLA_Obj Y, FLA_Obj Z, FLA_Obj T, FLA_Obj S)
 
FLA_Error FLA_Bidiag_UT_u_step_ops_var4 (int m_A, int n_A, int m_TS, float *buff_A, int rs_A, int cs_A, float *buff_Y, int rs_Y, int cs_Y, float *buff_Z, int rs_Z, int cs_Z, float *buff_T, int rs_T, int cs_T, float *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_opd_var4 (int m_A, int n_A, int m_TS, double *buff_A, int rs_A, int cs_A, double *buff_Y, int rs_Y, int cs_Y, double *buff_Z, int rs_Z, int cs_Z, double *buff_T, int rs_T, int cs_T, double *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_opc_var4 (int m_A, int n_A, int m_TS, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_Y, int rs_Y, int cs_Y, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_T, int rs_T, int cs_T, scomplex *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_opz_var4 (int m_A, int n_A, int m_TS, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_Y, int rs_Y, int cs_Y, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_T, int rs_T, int cs_T, dcomplex *buff_S, int rs_S, int cs_S)
 

Function Documentation

◆ FLA_Bidiag_UT_u_opt_var4()

FLA_Error FLA_Bidiag_UT_u_opt_var4 ( FLA_Obj  A,
FLA_Obj  TU,
FLA_Obj  TV 
)
14{
16 FLA_Obj Y, Z;
18 dim_t m_A, n_A;
19
21 m_A = FLA_Obj_length( A );
22 n_A = FLA_Obj_width( A );
23
24 FLA_Obj_create( datatype_A, n_A, n_A, 0, 0, &Y );
25 FLA_Obj_create( datatype_A, m_A, n_A, 0, 0, &Z );
26
28
29 FLA_Obj_free( &Y );
30 FLA_Obj_free( &Z );
31
32 return r_val;
33}
FLA_Error FLA_Bidiag_UT_u_step_opt_var4(FLA_Obj A, FLA_Obj Y, FLA_Obj Z, FLA_Obj T, FLA_Obj S)
Definition FLA_Bidiag_UT_u_opt_var4.c:35
dim_t FLA_Obj_width(FLA_Obj obj)
Definition FLA_Query.c:123
FLA_Error FLA_Obj_create(FLA_Datatype datatype, dim_t m, dim_t n, dim_t rs, dim_t cs, FLA_Obj *obj)
Definition FLA_Obj.c:55
dim_t FLA_Obj_length(FLA_Obj obj)
Definition FLA_Query.c:116
FLA_Error FLA_Obj_free(FLA_Obj *obj)
Definition FLA_Obj.c:588
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition FLA_Query.c:13
int FLA_Error
Definition FLA_type_defs.h:47
int FLA_Datatype
Definition FLA_type_defs.h:49
unsigned long dim_t
Definition FLA_type_defs.h:71
int i
Definition bl1_axmyv2.c:145
Definition FLA_type_defs.h:159

References FLA_Bidiag_UT_u_step_opt_var4(), FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_Obj_width(), and i.

Referenced by FLA_Bidiag_UT_u().

◆ FLA_Bidiag_UT_u_step_opc_var4()

FLA_Error FLA_Bidiag_UT_u_step_opc_var4 ( int  m_A,
int  n_A,
int  m_TS,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_Y,
int  rs_Y,
int  cs_Y,
scomplex buff_Z,
int  rs_Z,
int  cs_Z,
scomplex buff_T,
int  rs_T,
int  cs_T,
scomplex buff_S,
int  rs_S,
int  cs_S 
)
1259{
1263
1268 scomplex beta;
1270 int i;
1271
1272 // b_alg = FLA_Obj_length( T );
1273 int b_alg = m_TS;
1274
1275 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
1276 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &al );
1277 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
1278 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
1279 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
1280 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
1281 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &d );
1282 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &e );
1283 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
1284 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &g );
1285 scomplex* buff_w = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1286 scomplex* buff_al = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1287 scomplex* buff_ap = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1288 scomplex* buff_u = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1289 scomplex* buff_up = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1290 scomplex* buff_v = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1291 scomplex* buff_d = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1292 scomplex* buff_e = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1293 scomplex* buff_f = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1294 scomplex* buff_g = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1295 int inc_w = 1;
1296 int inc_al = 1;
1297 int inc_ap = 1;
1298 int inc_u = 1;
1299 int inc_up = 1;
1300 int inc_v = 1;
1301 int inc_d = 1;
1302 int inc_e = 1;
1303 int inc_f = 1;
1304 int inc_g = 1;
1305
1306 // FLA_Set( FLA_ZERO, Y );
1307 // FLA_Set( FLA_ZERO, Z );
1308 bl1_csetm( n_A,
1309 b_alg,
1310 buff_0,
1311 buff_Y, rs_Y, cs_Y );
1312 bl1_csetm( m_A,
1313 b_alg,
1314 buff_0,
1315 buff_Z, rs_Z, cs_Z );
1316
1317 for ( i = 0; i < b_alg; ++i )
1318 {
1319 scomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
1320 scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
1321 scomplex* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
1322 scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
1323 scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
1324 scomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
1325 scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
1326 scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
1327
1328 scomplex* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
1329 scomplex* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
1330 scomplex* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
1331
1332 scomplex* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
1333 scomplex* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
1334 scomplex* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
1335
1336 scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
1337 scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
1338
1339 scomplex* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
1340 scomplex* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
1341
1342 scomplex* w21 = buff_w + (i+1)*inc_w;
1343
1344 scomplex* a22l = buff_al + (i+1)*inc_al;
1345
1346 scomplex* a12p = buff_ap + (i+1)*inc_ap;
1347
1348 scomplex* u21 = buff_u + (i+1)*inc_u;
1349
1350 scomplex* u21p = buff_up + (i+1)*inc_up;
1351
1352 scomplex* v21 = buff_v + (i+1)*inc_v;
1353
1354 scomplex* d0 = buff_d + (0 )*inc_d;
1355
1356 scomplex* e0 = buff_e + (0 )*inc_e;
1357
1358 scomplex* f0 = buff_f + (0 )*inc_f;
1359
1360 scomplex* g0 = buff_g + (0 )*inc_g;
1361
1362 scomplex* a12p_t = a12p + (0 )*inc_ap;
1363 scomplex* a12p_b = a12p + (1 )*inc_ap;
1364
1365 scomplex* v21_t = v21 + (0 )*inc_v;
1366 scomplex* v21_b = v21 + (1 )*inc_v;
1367
1368 scomplex* a01_b = a01 + (0 )*cs_A + (i-1)*rs_A;
1369
1370 scomplex* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
1371 scomplex* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
1372
1373 scomplex* A02_l = A02 + (0 )*cs_A + (0 )*rs_A;
1374
1375 scomplex* A22_l = A22 + (0 )*cs_A + (0 )*rs_A;
1376
1377 scomplex* Y20_t = Y20 + (0 )*cs_Y + (0 )*rs_Y;
1378
1379 scomplex* ABL = a10t;
1380 scomplex* ZBL = z10t;
1381
1382 scomplex* a2 = alpha11;
1383
1384 int m_ahead = m_A - i - 1;
1385 int n_ahead = n_A - i - 1;
1386 int m_behind = i;
1387 int n_behind = i;
1388
1389 /*------------------------------------------------------------*/
1390
1391 if ( m_behind > 0 )
1392 {
1393 // FLA_Copy( a01_b, last_elem );
1394 // FLA_Set( FLA_ONE, a01_b );
1395 last_elem = *a01_b;
1396 *a01_b = *buff_1;
1397 }
1398
1399 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
1400 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a01, FLA_ONE, a2 );
1403 m_ahead + 1,
1404 n_behind,
1405 buff_m1,
1406 ABL, rs_A, cs_A,
1407 y10t, cs_Y,
1408 buff_1,
1409 a2, rs_A );
1412 m_ahead + 1,
1413 n_behind,
1414 buff_m1,
1415 ZBL, rs_Z, cs_Z,
1416 a01, rs_A,
1417 buff_1,
1418 a2, rs_A );
1419
1420 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
1421 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, A02, z10t, FLA_ONE, a12t );
1424 n_ahead,
1425 n_behind,
1426 buff_m1,
1427 Y20, rs_Y, cs_Y,
1428 a10t, cs_A,
1429 buff_1,
1430 a12t, cs_A );
1433 m_behind,
1434 n_ahead,
1435 buff_m1,
1436 A02, rs_A, cs_A,
1437 z10t, cs_Z,
1438 buff_1,
1439 a12t, cs_A );
1440
1441 if ( m_behind > 0 )
1442 {
1443 // FLA_Copy( last_elem, a01_b );
1444 *a01_b = last_elem;
1445 }
1446
1447 // FLA_Househ2_UT( FLA_LEFT,
1448 // alpha11,
1449 // a21, tau11 );
1450 // FLA_Copy( a21, u21p );
1452 alpha11,
1453 a21, rs_A,
1454 tau11 );
1456 m_ahead,
1457 a21, rs_A,
1458 u21p, inc_up );
1459
1460 if ( n_ahead > 0 )
1461 {
1462 // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
1463 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
1465
1466 // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
1467 // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
1469 n_ahead,
1470 a12t, cs_A,
1471 a12p, inc_ap );
1473 n_ahead,
1475 a12t, cs_A,
1476 a12p, inc_ap );
1477
1478 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21p, FLA_ZERO, d0 );
1479 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, u21p, FLA_ZERO, e0 );
1482 m_ahead,
1483 n_behind,
1484 buff_1,
1485 A20, rs_A, cs_A,
1486 u21p, inc_up,
1487 buff_0,
1488 d0, inc_d );
1491 m_ahead,
1492 n_behind,
1493 buff_1,
1494 Z20, rs_Z, cs_Z,
1495 u21p, inc_up,
1496 buff_0,
1497 e0, inc_e );
1498
1499 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
1500 // FLA_Axpy( FLA_ONE, d0, t01 );
1502 n_behind,
1503 a10t, cs_A,
1504 t01, rs_T );
1506 n_behind,
1507 buff_1,
1508 d0, inc_d,
1509 t01, rs_T );
1510
1511 // FLA_Set( FLA_ZERO, y21 );
1512 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
1513 // FLA_Gemv( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, e0, FLA_ONE, y21 );
1515 buff_0,
1516 y21, rs_Y );
1519 n_ahead,
1520 n_behind,
1521 buff_m1,
1522 Y20, rs_Y, cs_Y,
1523 d0, inc_d,
1524 buff_1,
1525 y21, rs_Y );
1528 m_behind,
1529 n_ahead,
1530 buff_m1,
1531 A02, rs_A, cs_A,
1532 e0, inc_e,
1533 buff_1,
1534 y21, rs_Y );
1535
1536 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, u21p, FLA_ONE, y21 );
1539 m_ahead,
1540 n_ahead,
1541 buff_1,
1542 A22, rs_A, cs_A,
1543 u21p, inc_up,
1544 buff_1,
1545 y21, rs_Y );
1546
1547 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
1549 n_ahead,
1551 y21, rs_Y,
1552 a12p, inc_ap );
1553
1554 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
1557 m_ahead,
1558 n_ahead,
1559 buff_1,
1560 A22, rs_A, cs_A,
1561 a12p, inc_ap,
1562 buff_0,
1563 w21, inc_w );
1564
1565 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, Y20, a12p, FLA_ZERO, f0 );
1566 // FLA_Gemvc( FLA_CONJ_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A02, a12p, FLA_ZERO, g0 );
1569 n_ahead,
1570 n_behind,
1571 buff_1,
1572 Y20, rs_Y, cs_Y,
1573 a12p, inc_ap,
1574 buff_0,
1575 f0, inc_f );
1578 m_behind,
1579 n_ahead,
1580 buff_1,
1581 A02, rs_A, cs_A,
1582 a12p, inc_ap,
1583 buff_0,
1584 g0, inc_g );
1585
1586 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, w21 );
1587 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, g0, FLA_ONE, w21 );
1590 m_ahead,
1591 n_behind,
1592 buff_m1,
1593 A20, rs_A, cs_A,
1594 f0, inc_f,
1595 buff_1,
1596 w21, inc_w );
1599 m_ahead,
1600 n_behind,
1601 buff_m1,
1602 Z20, rs_Z, cs_Z,
1603 g0, inc_g,
1604 buff_1,
1605 w21, inc_w );
1606
1607 // FLA_Copy( A22_l, a22l );
1608 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, A20, Y20_t, FLA_ONE, a22l );
1609 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, Z20, A02_l, FLA_ONE, a22l );
1611 m_ahead,
1612 A22_l, rs_A,
1613 a22l, inc_al );
1616 m_ahead,
1617 n_behind,
1618 buff_m1,
1619 A20, rs_A, cs_A,
1620 Y20_t, cs_Y,
1621 buff_1,
1622 a22l, inc_al );
1625 m_ahead,
1626 n_behind,
1627 buff_m1,
1628 Z20, rs_Z, cs_Z,
1629 A02_l, rs_A,
1630 buff_1,
1631 a22l, inc_al );
1632
1633 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
1635 n_ahead,
1636 buff_1,
1637 a12t, cs_A,
1638 y21, rs_Y );
1639
1640 // FLA_Househ2s_UT( FLA_RIGHT,
1641 // a12p_t,
1642 // a12p_b,
1643 // alpha12, psi11_minus_alpha12, sigma11 );
1645 a12p_t,
1646 a12p_b, inc_ap,
1647 &alpha12,
1649 sigma11 );
1650
1651 // FLA_Copy( a12p, v21 );
1652 // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
1653 // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
1654 // FLA_Conjugate( v21_b );
1656 n_ahead,
1657 a12p, inc_ap,
1658 v21, inc_v );
1661 n_ahead,
1663 v21, inc_v );
1664 bl1_cconjv( n_ahead - 1,
1665 v21_b, inc_v );
1666
1667 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
1668 // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
1671
1672 // FLA_Copy( g0, s01 );
1673 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_conj_alpha12, A02_l, s01 );
1674 // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, s01 );
1676 n_behind,
1677 g0, inc_g,
1678 s01, rs_S );
1680 n_behind,
1682 A02_l, rs_A,
1683 s01, rs_S );
1685 n_behind,
1687 s01, rs_S );
1688
1689 // FLA_Copyt( FLA_NO_TRANSPOSE, alpha12, a12t_l );
1690 // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
1691 *a12t_l = alpha12;
1693 n_ahead - 1,
1694 v21_b, inc_v,
1695 a12t_r, cs_A );
1696 }
1697
1698 // FLA_Copy( u21p, u21 );
1700 m_ahead,
1701 u21p, inc_up,
1702 u21, inc_u );
1703
1704 if ( n_ahead > 0 )
1705 {
1706 // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
1707 // FLA_Scal( FLA_MINUS_ONE, beta );
1708 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
1710 n_ahead,
1711 y21, rs_Y,
1712 v21, inc_v,
1713 &beta );
1715
1716 // FLA_Copy( w21, z21 );
1717 // FLA_Axpy( minus_conj_alpha12, a22l, z21 );
1718 // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
1719 // FLA_Axpy( beta, u21, z21 );
1721 m_ahead,
1722 w21, inc_w,
1723 z21, rs_Z );
1725 m_ahead,
1727 a22l, inc_al,
1728 z21, rs_Z );
1730 m_ahead,
1732 z21, rs_Z );
1734 m_ahead,
1735 &beta,
1736 u21, inc_u,
1737 z21, rs_Z );
1738
1739 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
1740 // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
1742 n_ahead,
1743 tau11,
1744 y21, rs_Y );
1746 m_ahead,
1747 sigma11,
1748 z21, rs_Z );
1749 }
1750 else // if ( n_ahead == 0 )
1751 {
1752 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
1753 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
1755 n_behind,
1756 a10t, cs_A,
1757 t01, rs_T );
1760 m_ahead,
1761 n_behind,
1762 buff_1,
1763 A20, rs_A, cs_A,
1764 u21, inc_u,
1765 buff_1,
1766 t01, rs_T );
1767 }
1768
1769 /*------------------------------------------------------------*/
1770
1771 }
1772
1773 // FLA_Obj_free( &w );
1774 // FLA_Obj_free( &al );
1775 // FLA_Obj_free( &ap );
1776 // FLA_Obj_free( &u );
1777 // FLA_Obj_free( &up );
1778 // FLA_Obj_free( &v );
1779 // FLA_Obj_free( &d );
1780 // FLA_Obj_free( &e );
1781 // FLA_Obj_free( &f );
1782 // FLA_Obj_free( &g );
1783 FLA_free( buff_w );
1784 FLA_free( buff_al );
1785 FLA_free( buff_ap );
1786 FLA_free( buff_u );
1787 FLA_free( buff_up );
1788 FLA_free( buff_v );
1789 FLA_free( buff_d );
1790 FLA_free( buff_e );
1791 FLA_free( buff_f );
1792 FLA_free( buff_g );
1793
1794 return FLA_SUCCESS;
1795}
FLA_Obj FLA_MINUS_ONE
Definition FLA_Init.c:22
FLA_Obj FLA_ZERO
Definition FLA_Init.c:20
FLA_Obj FLA_ONE
Definition FLA_Init.c:18
void FLA_free(void *ptr)
Definition FLA_Memory.c:247
void * FLA_malloc(size_t size)
Definition FLA_Memory.c:111
FLA_Error FLA_Househ2s_UT_r_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *alpha, scomplex *chi_1_minus_alpha, scomplex *tau)
Definition FLA_Househ2s_UT.c:589
FLA_Error FLA_Househ2_UT_l_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *tau)
Definition FLA_Househ2_UT.c:390
void bl1_caxpyv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
Definition bl1_axpyv.c:29
void bl1_cconjv(int m, scomplex *x, int incx)
Definition bl1_conjv.c:23
void bl1_ccopyv(conj1_t conj, int m, scomplex *x, int incx, scomplex *y, int incy)
Definition bl1_copyv.c:49
void bl1_cdot(conj1_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho)
Definition bl1_dot.c:39
void bl1_cgemv(trans1_t transa, conj1_t conjx, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition bl1_gemv.c:125
void bl1_cinvscalv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx)
Definition bl1_invscalv.c:52
void bl1_csetv(int m, scomplex *sigma, scomplex *x, int incx)
Definition bl1_setv.c:52
void bl1_csetm(int m, int n, scomplex *sigma, scomplex *a, int a_rs, int a_cs)
Definition bl1_setm.c:61
@ BLIS1_NO_TRANSPOSE
Definition blis_type_defs.h:54
@ BLIS1_CONJ_TRANSPOSE
Definition blis_type_defs.h:57
@ BLIS1_TRANSPOSE
Definition blis_type_defs.h:55
@ BLIS1_CONJ_NO_TRANSPOSE
Definition blis_type_defs.h:56
@ BLIS1_CONJUGATE
Definition blis_type_defs.h:82
@ BLIS1_NO_CONJUGATE
Definition blis_type_defs.h:81
Definition blis_type_defs.h:133

References bl1_caxpyv(), bl1_cconjv(), bl1_ccopyv(), bl1_cdot(), bl1_cgemv(), bl1_cinvscalv(), bl1_csetm(), bl1_csetv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opc(), FLA_Househ2s_UT_r_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var4().

◆ FLA_Bidiag_UT_u_step_opd_var4()

FLA_Error FLA_Bidiag_UT_u_step_opd_var4 ( int  m_A,
int  n_A,
int  m_TS,
double buff_A,
int  rs_A,
int  cs_A,
double buff_Y,
int  rs_Y,
int  cs_Y,
double buff_Z,
int  rs_Z,
int  cs_Z,
double buff_T,
int  rs_T,
int  cs_T,
double buff_S,
int  rs_S,
int  cs_S 
)
711{
712 double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
713 double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
715
716 double alpha12;
717 double minus_conj_alpha12;
718 double psi11_minus_alpha12;
719 double minus_inv_tau11;
720 double beta;
721 double last_elem;
722 int i;
723
724 // b_alg = FLA_Obj_length( T );
725 int b_alg = m_TS;
726
727 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
728 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &al );
729 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
730 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
731 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
732 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
733 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &d );
734 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &e );
735 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
736 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &g );
737 double* buff_w = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
738 double* buff_al = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
739 double* buff_ap = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
740 double* buff_u = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
741 double* buff_up = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
742 double* buff_v = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
743 double* buff_d = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
744 double* buff_e = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
745 double* buff_f = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
746 double* buff_g = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
747 int inc_w = 1;
748 int inc_al = 1;
749 int inc_ap = 1;
750 int inc_u = 1;
751 int inc_up = 1;
752 int inc_v = 1;
753 int inc_d = 1;
754 int inc_e = 1;
755 int inc_f = 1;
756 int inc_g = 1;
757
758 // FLA_Set( FLA_ZERO, Y );
759 // FLA_Set( FLA_ZERO, Z );
760 bl1_dsetm( n_A,
761 b_alg,
762 buff_0,
763 buff_Y, rs_Y, cs_Y );
764 bl1_dsetm( m_A,
765 b_alg,
766 buff_0,
767 buff_Z, rs_Z, cs_Z );
768
769 for ( i = 0; i < b_alg; ++i )
770 {
771 double* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
772 double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
773 double* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
774 double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
775 double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
776 double* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
777 double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
778 double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
779
780 double* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
781 double* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
782 double* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
783
784 double* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
785 double* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
786 double* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
787
788 double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
789 double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
790
791 double* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
792 double* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
793
794 double* w21 = buff_w + (i+1)*inc_w;
795
796 double* a22l = buff_al + (i+1)*inc_al;
797
798 double* a12p = buff_ap + (i+1)*inc_ap;
799
800 double* u21 = buff_u + (i+1)*inc_u;
801
802 double* u21p = buff_up + (i+1)*inc_up;
803
804 double* v21 = buff_v + (i+1)*inc_v;
805
806 double* d0 = buff_d + (0 )*inc_d;
807
808 double* e0 = buff_e + (0 )*inc_e;
809
810 double* f0 = buff_f + (0 )*inc_f;
811
812 double* g0 = buff_g + (0 )*inc_g;
813
814 double* a12p_t = a12p + (0 )*inc_ap;
815 double* a12p_b = a12p + (1 )*inc_ap;
816
817 double* v21_t = v21 + (0 )*inc_v;
818 double* v21_b = v21 + (1 )*inc_v;
819
820 double* a01_b = a01 + (0 )*cs_A + (i-1)*rs_A;
821
822 double* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
823 double* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
824
825 double* A02_l = A02 + (0 )*cs_A + (0 )*rs_A;
826
827 double* A22_l = A22 + (0 )*cs_A + (0 )*rs_A;
828
829 double* Y20_t = Y20 + (0 )*cs_Y + (0 )*rs_Y;
830
831 double* ABL = a10t;
832 double* ZBL = z10t;
833
834 double* a2 = alpha11;
835
836 int m_ahead = m_A - i - 1;
837 int n_ahead = n_A - i - 1;
838 int m_behind = i;
839 int n_behind = i;
840
841 /*------------------------------------------------------------*/
842
843 if ( m_behind > 0 )
844 {
845 // FLA_Copy( a01_b, last_elem );
846 // FLA_Set( FLA_ONE, a01_b );
847 last_elem = *a01_b;
848 *a01_b = *buff_1;
849 }
850
851 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
852 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a01, FLA_ONE, a2 );
855 m_ahead + 1,
856 n_behind,
857 buff_m1,
858 ABL, rs_A, cs_A,
859 y10t, cs_Y,
860 buff_1,
861 a2, rs_A );
864 m_ahead + 1,
865 n_behind,
866 buff_m1,
867 ZBL, rs_Z, cs_Z,
868 a01, rs_A,
869 buff_1,
870 a2, rs_A );
871
872 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
873 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, A02, z10t, FLA_ONE, a12t );
876 n_ahead,
877 n_behind,
878 buff_m1,
879 Y20, rs_Y, cs_Y,
880 a10t, cs_A,
881 buff_1,
882 a12t, cs_A );
885 m_behind,
886 n_ahead,
887 buff_m1,
888 A02, rs_A, cs_A,
889 z10t, cs_Z,
890 buff_1,
891 a12t, cs_A );
892
893 if ( m_behind > 0 )
894 {
895 // FLA_Copy( last_elem, a01_b );
896 *a01_b = last_elem;
897 }
898
899 // FLA_Househ2_UT( FLA_LEFT,
900 // alpha11,
901 // a21, tau11 );
902 // FLA_Copy( a21, u21p );
904 alpha11,
905 a21, rs_A,
906 tau11 );
908 m_ahead,
909 a21, rs_A,
910 u21p, inc_up );
911
912 if ( n_ahead > 0 )
913 {
914 // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
915 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
917
918 // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
919 // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
921 n_ahead,
922 a12t, cs_A,
923 a12p, inc_ap );
925 n_ahead,
927 a12t, cs_A,
928 a12p, inc_ap );
929
930 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21p, FLA_ZERO, d0 );
931 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, u21p, FLA_ZERO, e0 );
934 m_ahead,
935 n_behind,
936 buff_1,
937 A20, rs_A, cs_A,
938 u21p, inc_up,
939 buff_0,
940 d0, inc_d );
943 m_ahead,
944 n_behind,
945 buff_1,
946 Z20, rs_Z, cs_Z,
947 u21p, inc_up,
948 buff_0,
949 e0, inc_e );
950
951 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
952 // FLA_Axpy( FLA_ONE, d0, t01 );
954 n_behind,
955 a10t, cs_A,
956 t01, rs_T );
958 n_behind,
959 buff_1,
960 d0, inc_d,
961 t01, rs_T );
962
963 // FLA_Set( FLA_ZERO, y21 );
964 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
965 // FLA_Gemv( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, e0, FLA_ONE, y21 );
967 buff_0,
968 y21, rs_Y );
971 n_ahead,
972 n_behind,
973 buff_m1,
974 Y20, rs_Y, cs_Y,
975 d0, inc_d,
976 buff_1,
977 y21, rs_Y );
980 m_behind,
981 n_ahead,
982 buff_m1,
983 A02, rs_A, cs_A,
984 e0, inc_e,
985 buff_1,
986 y21, rs_Y );
987
988 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, u21p, FLA_ONE, y21 );
991 m_ahead,
992 n_ahead,
993 buff_1,
994 A22, rs_A, cs_A,
995 u21p, inc_up,
996 buff_1,
997 y21, rs_Y );
998
999 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
1001 n_ahead,
1003 y21, rs_Y,
1004 a12p, inc_ap );
1005
1006 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
1009 m_ahead,
1010 n_ahead,
1011 buff_1,
1012 A22, rs_A, cs_A,
1013 a12p, inc_ap,
1014 buff_0,
1015 w21, inc_w );
1016
1017 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, Y20, a12p, FLA_ZERO, f0 );
1018 // FLA_Gemvc( FLA_CONJ_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A02, a12p, FLA_ZERO, g0 );
1021 n_ahead,
1022 n_behind,
1023 buff_1,
1024 Y20, rs_Y, cs_Y,
1025 a12p, inc_ap,
1026 buff_0,
1027 f0, inc_f );
1030 m_behind,
1031 n_ahead,
1032 buff_1,
1033 A02, rs_A, cs_A,
1034 a12p, inc_ap,
1035 buff_0,
1036 g0, inc_g );
1037
1038 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, w21 );
1039 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, g0, FLA_ONE, w21 );
1042 m_ahead,
1043 n_behind,
1044 buff_m1,
1045 A20, rs_A, cs_A,
1046 f0, inc_f,
1047 buff_1,
1048 w21, inc_w );
1051 m_ahead,
1052 n_behind,
1053 buff_m1,
1054 Z20, rs_Z, cs_Z,
1055 g0, inc_g,
1056 buff_1,
1057 w21, inc_w );
1058
1059 // FLA_Copy( A22_l, a22l );
1060 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, A20, Y20_t, FLA_ONE, a22l );
1061 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, Z20, A02_l, FLA_ONE, a22l );
1063 m_ahead,
1064 A22_l, rs_A,
1065 a22l, inc_al );
1068 m_ahead,
1069 n_behind,
1070 buff_m1,
1071 A20, rs_A, cs_A,
1072 Y20_t, cs_Y,
1073 buff_1,
1074 a22l, inc_al );
1077 m_ahead,
1078 n_behind,
1079 buff_m1,
1080 Z20, rs_Z, cs_Z,
1081 A02_l, rs_A,
1082 buff_1,
1083 a22l, inc_al );
1084
1085 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
1087 n_ahead,
1088 buff_1,
1089 a12t, cs_A,
1090 y21, rs_Y );
1091
1092 // FLA_Househ2s_UT( FLA_RIGHT,
1093 // a12p_t,
1094 // a12p_b,
1095 // alpha12, psi11_minus_alpha12, sigma11 );
1097 a12p_t,
1098 a12p_b, inc_ap,
1099 &alpha12,
1101 sigma11 );
1102
1103 // FLA_Copy( a12p, v21 );
1104 // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
1105 // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
1106 // FLA_Conjugate( v21_b );
1108 n_ahead,
1109 a12p, inc_ap,
1110 v21, inc_v );
1113 n_ahead,
1115 v21, inc_v );
1116 bl1_dconjv( n_ahead - 1,
1117 v21_b, inc_v );
1118
1119 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
1120 // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
1123
1124 // FLA_Copy( g0, s01 );
1125 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_conj_alpha12, A02_l, s01 );
1126 // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, s01 );
1128 n_behind,
1129 g0, inc_g,
1130 s01, rs_S );
1132 n_behind,
1134 A02_l, rs_A,
1135 s01, rs_S );
1137 n_behind,
1139 s01, rs_S );
1140
1141 // FLA_Copyt( FLA_NO_TRANSPOSE, alpha12, a12t_l );
1142 // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
1143 *a12t_l = alpha12;
1145 n_ahead - 1,
1146 v21_b, inc_v,
1147 a12t_r, cs_A );
1148 }
1149
1150 // FLA_Copy( u21p, u21 );
1152 m_ahead,
1153 u21p, inc_up,
1154 u21, inc_u );
1155
1156 if ( n_ahead > 0 )
1157 {
1158 // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
1159 // FLA_Scal( FLA_MINUS_ONE, beta );
1160 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
1162 n_ahead,
1163 y21, rs_Y,
1164 v21, inc_v,
1165 &beta );
1167
1168 // FLA_Copy( w21, z21 );
1169 // FLA_Axpy( minus_conj_alpha12, a22l, z21 );
1170 // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
1171 // FLA_Axpy( beta, u21, z21 );
1173 m_ahead,
1174 w21, inc_w,
1175 z21, rs_Z );
1177 m_ahead,
1179 a22l, inc_al,
1180 z21, rs_Z );
1182 m_ahead,
1184 z21, rs_Z );
1186 m_ahead,
1187 &beta,
1188 u21, inc_u,
1189 z21, rs_Z );
1190
1191 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
1192 // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
1194 n_ahead,
1195 tau11,
1196 y21, rs_Y );
1198 m_ahead,
1199 sigma11,
1200 z21, rs_Z );
1201 }
1202 else // if ( n_ahead == 0 )
1203 {
1204 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
1205 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
1207 n_behind,
1208 a10t, cs_A,
1209 t01, rs_T );
1212 m_ahead,
1213 n_behind,
1214 buff_1,
1215 A20, rs_A, cs_A,
1216 u21, inc_u,
1217 buff_1,
1218 t01, rs_T );
1219 }
1220
1221 /*------------------------------------------------------------*/
1222
1223 }
1224
1225 // FLA_Obj_free( &w );
1226 // FLA_Obj_free( &al );
1227 // FLA_Obj_free( &ap );
1228 // FLA_Obj_free( &u );
1229 // FLA_Obj_free( &up );
1230 // FLA_Obj_free( &v );
1231 // FLA_Obj_free( &d );
1232 // FLA_Obj_free( &e );
1233 // FLA_Obj_free( &f );
1234 // FLA_Obj_free( &g );
1235 FLA_free( buff_w );
1236 FLA_free( buff_al );
1237 FLA_free( buff_ap );
1238 FLA_free( buff_u );
1239 FLA_free( buff_up );
1240 FLA_free( buff_v );
1241 FLA_free( buff_d );
1242 FLA_free( buff_e );
1243 FLA_free( buff_f );
1244 FLA_free( buff_g );
1245
1246 return FLA_SUCCESS;
1247}
FLA_Error FLA_Househ2_UT_l_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *tau)
Definition FLA_Househ2_UT.c:274
FLA_Error FLA_Househ2s_UT_r_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *alpha, double *chi_1_minus_alpha, double *tau)
Definition FLA_Househ2s_UT.c:572
void bl1_daxpyv(conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy)
Definition bl1_axpyv.c:21
void bl1_dconjv(int m, double *x, int incx)
Definition bl1_conjv.c:18
void bl1_dcopyv(conj1_t conj, int m, double *x, int incx, double *y, int incy)
Definition bl1_copyv.c:42
void bl1_ddot(conj1_t conj, int n, double *x, int incx, double *y, int incy, double *rho)
Definition bl1_dot.c:26
void bl1_dgemv(trans1_t transa, conj1_t conjx, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
Definition bl1_gemv.c:69
void bl1_dinvscalv(conj1_t conj, int n, double *alpha, double *x, int incx)
Definition bl1_invscalv.c:26
void bl1_dsetv(int m, double *sigma, double *x, int incx)
Definition bl1_setv.c:39
void bl1_dsetm(int m, int n, double *sigma, double *a, int a_rs, int a_cs)
Definition bl1_setm.c:45

References bl1_daxpyv(), bl1_dconjv(), bl1_dcopyv(), bl1_ddot(), bl1_dgemv(), bl1_dinvscalv(), bl1_dsetm(), bl1_dsetv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opd(), FLA_Househ2s_UT_r_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var4().

◆ FLA_Bidiag_UT_u_step_ops_var4()

FLA_Error FLA_Bidiag_UT_u_step_ops_var4 ( int  m_A,
int  n_A,
int  m_TS,
float buff_A,
int  rs_A,
int  cs_A,
float buff_Y,
int  rs_Y,
int  cs_Y,
float buff_Z,
int  rs_Z,
int  cs_Z,
float buff_T,
int  rs_T,
int  cs_T,
float buff_S,
int  rs_S,
int  cs_S 
)
163{
164 float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
165 float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
167
168 float alpha12;
169 float minus_conj_alpha12;
171 float minus_inv_tau11;
172 float beta;
173 float last_elem;
174 int i;
175
176 // b_alg = FLA_Obj_length( T );
177 int b_alg = m_TS;
178
179 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
180 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &al );
181 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
182 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
183 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
184 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
185 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &d );
186 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &e );
187 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
188 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &g );
189 float* buff_w = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
190 float* buff_al = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
191 float* buff_ap = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
192 float* buff_u = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
193 float* buff_up = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
194 float* buff_v = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
195 float* buff_d = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
196 float* buff_e = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
197 float* buff_f = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
198 float* buff_g = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
199 int inc_w = 1;
200 int inc_al = 1;
201 int inc_ap = 1;
202 int inc_u = 1;
203 int inc_up = 1;
204 int inc_v = 1;
205 int inc_d = 1;
206 int inc_e = 1;
207 int inc_f = 1;
208 int inc_g = 1;
209
210 // FLA_Set( FLA_ZERO, Y );
211 // FLA_Set( FLA_ZERO, Z );
212 bl1_ssetm( n_A,
213 b_alg,
214 buff_0,
215 buff_Y, rs_Y, cs_Y );
216 bl1_ssetm( m_A,
217 b_alg,
218 buff_0,
219 buff_Z, rs_Z, cs_Z );
220
221 for ( i = 0; i < b_alg; ++i )
222 {
223 float* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
224 float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
225 float* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
226 float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
227 float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
228 float* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
229 float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
230 float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
231
232 float* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
233 float* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
234 float* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
235
236 float* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
237 float* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
238 float* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
239
240 float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
241 float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
242
243 float* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
244 float* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
245
246 float* w21 = buff_w + (i+1)*inc_w;
247
248 float* a22l = buff_al + (i+1)*inc_al;
249
250 float* a12p = buff_ap + (i+1)*inc_ap;
251
252 float* u21 = buff_u + (i+1)*inc_u;
253
254 float* u21p = buff_up + (i+1)*inc_up;
255
256 float* v21 = buff_v + (i+1)*inc_v;
257
258 float* d0 = buff_d + (0 )*inc_d;
259
260 float* e0 = buff_e + (0 )*inc_e;
261
262 float* f0 = buff_f + (0 )*inc_f;
263
264 float* g0 = buff_g + (0 )*inc_g;
265
266 float* a12p_t = a12p + (0 )*inc_ap;
267 float* a12p_b = a12p + (1 )*inc_ap;
268
269 float* v21_t = v21 + (0 )*inc_v;
270 float* v21_b = v21 + (1 )*inc_v;
271
272 float* a01_b = a01 + (0 )*cs_A + (i-1)*rs_A;
273
274 float* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
275 float* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
276
277 float* A02_l = A02 + (0 )*cs_A + (0 )*rs_A;
278
279 float* A22_l = A22 + (0 )*cs_A + (0 )*rs_A;
280
281 float* Y20_t = Y20 + (0 )*cs_Y + (0 )*rs_Y;
282
283 float* ABL = a10t;
284 float* ZBL = z10t;
285
286 float* a2 = alpha11;
287
288 int m_ahead = m_A - i - 1;
289 int n_ahead = n_A - i - 1;
290 int m_behind = i;
291 int n_behind = i;
292
293 /*------------------------------------------------------------*/
294
295 if ( m_behind > 0 )
296 {
297 // FLA_Copy( a01_b, last_elem );
298 // FLA_Set( FLA_ONE, a01_b );
299 last_elem = *a01_b;
300 *a01_b = *buff_1;
301 }
302
303 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
304 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a01, FLA_ONE, a2 );
307 m_ahead + 1,
308 n_behind,
309 buff_m1,
310 ABL, rs_A, cs_A,
311 y10t, cs_Y,
312 buff_1,
313 a2, rs_A );
316 m_ahead + 1,
317 n_behind,
318 buff_m1,
319 ZBL, rs_Z, cs_Z,
320 a01, rs_A,
321 buff_1,
322 a2, rs_A );
323
324 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
325 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, A02, z10t, FLA_ONE, a12t );
328 n_ahead,
329 n_behind,
330 buff_m1,
331 Y20, rs_Y, cs_Y,
332 a10t, cs_A,
333 buff_1,
334 a12t, cs_A );
337 m_behind,
338 n_ahead,
339 buff_m1,
340 A02, rs_A, cs_A,
341 z10t, cs_Z,
342 buff_1,
343 a12t, cs_A );
344
345 if ( m_behind > 0 )
346 {
347 // FLA_Copy( last_elem, a01_b );
348 *a01_b = last_elem;
349 }
350
351 // FLA_Househ2_UT( FLA_LEFT,
352 // alpha11,
353 // a21, tau11 );
354 // FLA_Copy( a21, u21p );
356 alpha11,
357 a21, rs_A,
358 tau11 );
360 m_ahead,
361 a21, rs_A,
362 u21p, inc_up );
363
364 if ( n_ahead > 0 )
365 {
366 // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
367 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
369
370 // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
371 // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
373 n_ahead,
374 a12t, cs_A,
375 a12p, inc_ap );
377 n_ahead,
379 a12t, cs_A,
380 a12p, inc_ap );
381
382 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21p, FLA_ZERO, d0 );
383 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, u21p, FLA_ZERO, e0 );
386 m_ahead,
387 n_behind,
388 buff_1,
389 A20, rs_A, cs_A,
390 u21p, inc_up,
391 buff_0,
392 d0, inc_d );
395 m_ahead,
396 n_behind,
397 buff_1,
398 Z20, rs_Z, cs_Z,
399 u21p, inc_up,
400 buff_0,
401 e0, inc_e );
402
403 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
404 // FLA_Axpy( FLA_ONE, d0, t01 );
406 n_behind,
407 a10t, cs_A,
408 t01, rs_T );
410 n_behind,
411 buff_1,
412 d0, inc_d,
413 t01, rs_T );
414
415 // FLA_Set( FLA_ZERO, y21 );
416 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
417 // FLA_Gemv( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, e0, FLA_ONE, y21 );
419 buff_0,
420 y21, rs_Y );
423 n_ahead,
424 n_behind,
425 buff_m1,
426 Y20, rs_Y, cs_Y,
427 d0, inc_d,
428 buff_1,
429 y21, rs_Y );
432 m_behind,
433 n_ahead,
434 buff_m1,
435 A02, rs_A, cs_A,
436 e0, inc_e,
437 buff_1,
438 y21, rs_Y );
439
440 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, u21p, FLA_ONE, y21 );
443 m_ahead,
444 n_ahead,
445 buff_1,
446 A22, rs_A, cs_A,
447 u21p, inc_up,
448 buff_1,
449 y21, rs_Y );
450
451 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
453 n_ahead,
455 y21, rs_Y,
456 a12p, inc_ap );
457
458 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
461 m_ahead,
462 n_ahead,
463 buff_1,
464 A22, rs_A, cs_A,
465 a12p, inc_ap,
466 buff_0,
467 w21, inc_w );
468
469 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, Y20, a12p, FLA_ZERO, f0 );
470 // FLA_Gemvc( FLA_CONJ_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A02, a12p, FLA_ZERO, g0 );
473 n_ahead,
474 n_behind,
475 buff_1,
476 Y20, rs_Y, cs_Y,
477 a12p, inc_ap,
478 buff_0,
479 f0, inc_f );
482 m_behind,
483 n_ahead,
484 buff_1,
485 A02, rs_A, cs_A,
486 a12p, inc_ap,
487 buff_0,
488 g0, inc_g );
489
490 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, w21 );
491 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, g0, FLA_ONE, w21 );
494 m_ahead,
495 n_behind,
496 buff_m1,
497 A20, rs_A, cs_A,
498 f0, inc_f,
499 buff_1,
500 w21, inc_w );
503 m_ahead,
504 n_behind,
505 buff_m1,
506 Z20, rs_Z, cs_Z,
507 g0, inc_g,
508 buff_1,
509 w21, inc_w );
510
511 // FLA_Copy( A22_l, a22l );
512 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, A20, Y20_t, FLA_ONE, a22l );
513 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, Z20, A02_l, FLA_ONE, a22l );
515 m_ahead,
516 A22_l, rs_A,
517 a22l, inc_al );
520 m_ahead,
521 n_behind,
522 buff_m1,
523 A20, rs_A, cs_A,
524 Y20_t, cs_Y,
525 buff_1,
526 a22l, inc_al );
529 m_ahead,
530 n_behind,
531 buff_m1,
532 Z20, rs_Z, cs_Z,
533 A02_l, rs_A,
534 buff_1,
535 a22l, inc_al );
536
537 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
539 n_ahead,
540 buff_1,
541 a12t, cs_A,
542 y21, rs_Y );
543
544 // FLA_Househ2s_UT( FLA_RIGHT,
545 // a12p_t,
546 // a12p_b,
547 // alpha12, psi11_minus_alpha12, sigma11 );
549 a12p_t,
550 a12p_b, inc_ap,
551 &alpha12,
553 sigma11 );
554
555 // FLA_Copy( a12p, v21 );
556 // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
557 // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
558 // FLA_Conjugate( v21_b );
560 n_ahead,
561 a12p, inc_ap,
562 v21, inc_v );
565 n_ahead,
567 v21, inc_v );
568 bl1_sconjv( n_ahead - 1,
569 v21_b, inc_v );
570
571 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
572 // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
575
576 // FLA_Copy( g0, s01 );
577 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_conj_alpha12, A02_l, s01 );
578 // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, s01 );
580 n_behind,
581 g0, inc_g,
582 s01, rs_S );
584 n_behind,
586 A02_l, rs_A,
587 s01, rs_S );
589 n_behind,
591 s01, rs_S );
592
593 // FLA_Copyt( FLA_NO_TRANSPOSE, alpha12, a12t_l );
594 // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
595 *a12t_l = alpha12;
597 n_ahead - 1,
598 v21_b, inc_v,
599 a12t_r, cs_A );
600 }
601
602 // FLA_Copy( u21p, u21 );
604 m_ahead,
605 u21p, inc_up,
606 u21, inc_u );
607
608 if ( n_ahead > 0 )
609 {
610 // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
611 // FLA_Scal( FLA_MINUS_ONE, beta );
612 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
614 n_ahead,
615 y21, rs_Y,
616 v21, inc_v,
617 &beta );
619
620 // FLA_Copy( w21, z21 );
621 // FLA_Axpy( minus_conj_alpha12, a22l, z21 );
622 // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
623 // FLA_Axpy( beta, u21, z21 );
625 m_ahead,
626 w21, inc_w,
627 z21, rs_Z );
629 m_ahead,
631 a22l, inc_al,
632 z21, rs_Z );
634 m_ahead,
636 z21, rs_Z );
638 m_ahead,
639 &beta,
640 u21, inc_u,
641 z21, rs_Z );
642
643 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
644 // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
646 n_ahead,
647 tau11,
648 y21, rs_Y );
650 m_ahead,
651 sigma11,
652 z21, rs_Z );
653 }
654 else // if ( n_ahead == 0 )
655 {
656 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
657 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
659 n_behind,
660 a10t, cs_A,
661 t01, rs_T );
664 m_ahead,
665 n_behind,
666 buff_1,
667 A20, rs_A, cs_A,
668 u21, inc_u,
669 buff_1,
670 t01, rs_T );
671 }
672
673 /*------------------------------------------------------------*/
674
675 }
676
677 // FLA_Obj_free( &w );
678 // FLA_Obj_free( &al );
679 // FLA_Obj_free( &ap );
680 // FLA_Obj_free( &u );
681 // FLA_Obj_free( &up );
682 // FLA_Obj_free( &v );
683 // FLA_Obj_free( &d );
684 // FLA_Obj_free( &e );
685 // FLA_Obj_free( &f );
686 // FLA_Obj_free( &g );
687 FLA_free( buff_w );
688 FLA_free( buff_al );
689 FLA_free( buff_ap );
690 FLA_free( buff_u );
691 FLA_free( buff_up );
692 FLA_free( buff_v );
693 FLA_free( buff_d );
694 FLA_free( buff_e );
695 FLA_free( buff_f );
696 FLA_free( buff_g );
697
698 return FLA_SUCCESS;
699}
FLA_Error FLA_Househ2_UT_l_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *tau)
Definition FLA_Househ2_UT.c:160
FLA_Error FLA_Househ2s_UT_r_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *alpha, float *chi_1_minus_alpha, float *tau)
Definition FLA_Househ2s_UT.c:555
void bl1_saxpyv(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy)
Definition bl1_axpyv.c:13
void bl1_sconjv(int m, float *x, int incx)
Definition bl1_conjv.c:13
void bl1_scopyv(conj1_t conj, int m, float *x, int incx, float *y, int incy)
Definition bl1_copyv.c:35
void bl1_sdot(conj1_t conj, int n, float *x, int incx, float *y, int incy, float *rho)
Definition bl1_dot.c:13
void bl1_sgemv(trans1_t transa, conj1_t conjx, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
Definition bl1_gemv.c:13
void bl1_sinvscalv(conj1_t conj, int n, float *alpha, float *x, int incx)
Definition bl1_invscalv.c:13
void bl1_ssetm(int m, int n, float *sigma, float *a, int a_rs, int a_cs)
Definition bl1_setm.c:29
void bl1_ssetv(int m, float *sigma, float *x, int incx)
Definition bl1_setv.c:26

References bl1_saxpyv(), bl1_sconjv(), bl1_scopyv(), bl1_sdot(), bl1_sgemv(), bl1_sinvscalv(), bl1_ssetm(), bl1_ssetv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_ops(), FLA_Househ2s_UT_r_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var4().

◆ FLA_Bidiag_UT_u_step_opt_var4()

FLA_Error FLA_Bidiag_UT_u_step_opt_var4 ( FLA_Obj  A,
FLA_Obj  Y,
FLA_Obj  Z,
FLA_Obj  T,
FLA_Obj  S 
)
36{
37 FLA_Datatype datatype;
38 int m_A, n_A, m_TS;
39 int rs_A, cs_A;
40 int rs_Y, cs_Y;
41 int rs_Z, cs_Z;
42 int rs_T, cs_T;
43 int rs_S, cs_S;
44
45 datatype = FLA_Obj_datatype( A );
46
47 m_A = FLA_Obj_length( A );
48 n_A = FLA_Obj_width( A );
50
53
56
59
62
65
66
67 switch ( datatype )
68 {
69 case FLA_FLOAT:
70 {
71 float* buff_A = FLA_FLOAT_PTR( A );
72 float* buff_Y = FLA_FLOAT_PTR( Y );
73 float* buff_Z = FLA_FLOAT_PTR( Z );
74 float* buff_T = FLA_FLOAT_PTR( T );
75 float* buff_S = FLA_FLOAT_PTR( S );
76
78 n_A,
79 m_TS,
84 buff_S, rs_S, cs_S );
85
86 break;
87 }
88
89 case FLA_DOUBLE:
90 {
91 double* buff_A = FLA_DOUBLE_PTR( A );
92 double* buff_Y = FLA_DOUBLE_PTR( Y );
93 double* buff_Z = FLA_DOUBLE_PTR( Z );
94 double* buff_T = FLA_DOUBLE_PTR( T );
95 double* buff_S = FLA_DOUBLE_PTR( S );
96
98 n_A,
99 m_TS,
100 buff_A, rs_A, cs_A,
101 buff_Y, rs_Y, cs_Y,
102 buff_Z, rs_Z, cs_Z,
103 buff_T, rs_T, cs_T,
104 buff_S, rs_S, cs_S );
105
106 break;
107 }
108
109 case FLA_COMPLEX:
110 {
116
118 n_A,
119 m_TS,
120 buff_A, rs_A, cs_A,
121 buff_Y, rs_Y, cs_Y,
122 buff_Z, rs_Z, cs_Z,
123 buff_T, rs_T, cs_T,
124 buff_S, rs_S, cs_S );
125
126 break;
127 }
128
130 {
136
138 n_A,
139 m_TS,
140 buff_A, rs_A, cs_A,
141 buff_Y, rs_Y, cs_Y,
142 buff_Z, rs_Z, cs_Z,
143 buff_T, rs_T, cs_T,
144 buff_S, rs_S, cs_S );
145
146 break;
147 }
148 }
149
150 return FLA_SUCCESS;
151}
FLA_Error FLA_Bidiag_UT_u_step_ops_var4(int m_A, int n_A, int m_TS, float *buff_A, int rs_A, int cs_A, float *buff_Y, int rs_Y, int cs_Y, float *buff_Z, int rs_Z, int cs_Z, float *buff_T, int rs_T, int cs_T, float *buff_S, int rs_S, int cs_S)
Definition FLA_Bidiag_UT_u_opt_var4.c:155
FLA_Error FLA_Bidiag_UT_u_step_opz_var4(int m_A, int n_A, int m_TS, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_Y, int rs_Y, int cs_Y, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_T, int rs_T, int cs_T, dcomplex *buff_S, int rs_S, int cs_S)
Definition FLA_Bidiag_UT_u_opt_var4.c:1799
FLA_Error FLA_Bidiag_UT_u_step_opd_var4(int m_A, int n_A, int m_TS, double *buff_A, int rs_A, int cs_A, double *buff_Y, int rs_Y, int cs_Y, double *buff_Z, int rs_Z, int cs_Z, double *buff_T, int rs_T, int cs_T, double *buff_S, int rs_S, int cs_S)
Definition FLA_Bidiag_UT_u_opt_var4.c:703
FLA_Error FLA_Bidiag_UT_u_step_opc_var4(int m_A, int n_A, int m_TS, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_Y, int rs_Y, int cs_Y, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_T, int rs_T, int cs_T, scomplex *buff_S, int rs_S, int cs_S)
Definition FLA_Bidiag_UT_u_opt_var4.c:1251
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition FLA_Query.c:167
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition FLA_Query.c:174
Definition blis_type_defs.h:138

References FLA_Bidiag_UT_u_step_opc_var4(), FLA_Bidiag_UT_u_step_opd_var4(), FLA_Bidiag_UT_u_step_ops_var4(), FLA_Bidiag_UT_u_step_opz_var4(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), and i.

Referenced by FLA_Bidiag_UT_u_blk_var4(), and FLA_Bidiag_UT_u_opt_var4().

◆ FLA_Bidiag_UT_u_step_opz_var4()

FLA_Error FLA_Bidiag_UT_u_step_opz_var4 ( int  m_A,
int  n_A,
int  m_TS,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_Y,
int  rs_Y,
int  cs_Y,
dcomplex buff_Z,
int  rs_Z,
int  cs_Z,
dcomplex buff_T,
int  rs_T,
int  cs_T,
dcomplex buff_S,
int  rs_S,
int  cs_S 
)
1807{
1811
1816 dcomplex beta;
1818 int i;
1819
1820 // b_alg = FLA_Obj_length( T );
1821 int b_alg = m_TS;
1822
1823 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
1824 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &al );
1825 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
1826 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
1827 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
1828 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
1829 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &d );
1830 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &e );
1831 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
1832 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &g );
1833 dcomplex* buff_w = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1834 dcomplex* buff_al = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1835 dcomplex* buff_ap = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1836 dcomplex* buff_u = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1837 dcomplex* buff_up = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1838 dcomplex* buff_v = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1839 dcomplex* buff_d = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1840 dcomplex* buff_e = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1841 dcomplex* buff_f = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1842 dcomplex* buff_g = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1843 int inc_w = 1;
1844 int inc_al = 1;
1845 int inc_ap = 1;
1846 int inc_u = 1;
1847 int inc_up = 1;
1848 int inc_v = 1;
1849 int inc_d = 1;
1850 int inc_e = 1;
1851 int inc_f = 1;
1852 int inc_g = 1;
1853
1854 // FLA_Set( FLA_ZERO, Y );
1855 // FLA_Set( FLA_ZERO, Z );
1856 bl1_zsetm( n_A,
1857 b_alg,
1858 buff_0,
1859 buff_Y, rs_Y, cs_Y );
1860 bl1_zsetm( m_A,
1861 b_alg,
1862 buff_0,
1863 buff_Z, rs_Z, cs_Z );
1864
1865 for ( i = 0; i < b_alg; ++i )
1866 {
1867 dcomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
1868 dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
1869 dcomplex* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
1870 dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
1871 dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
1872 dcomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
1873 dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
1874 dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
1875
1876 dcomplex* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
1877 dcomplex* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
1878 dcomplex* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
1879
1880 dcomplex* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
1881 dcomplex* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
1882 dcomplex* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
1883
1884 dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
1885 dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
1886
1887 dcomplex* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
1888 dcomplex* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
1889
1890 dcomplex* w21 = buff_w + (i+1)*inc_w;
1891
1892 dcomplex* a22l = buff_al + (i+1)*inc_al;
1893
1894 dcomplex* a12p = buff_ap + (i+1)*inc_ap;
1895
1896 dcomplex* u21 = buff_u + (i+1)*inc_u;
1897
1898 dcomplex* u21p = buff_up + (i+1)*inc_up;
1899
1900 dcomplex* v21 = buff_v + (i+1)*inc_v;
1901
1902 dcomplex* d0 = buff_d + (0 )*inc_d;
1903
1904 dcomplex* e0 = buff_e + (0 )*inc_e;
1905
1906 dcomplex* f0 = buff_f + (0 )*inc_f;
1907
1908 dcomplex* g0 = buff_g + (0 )*inc_g;
1909
1910 dcomplex* a12p_t = a12p + (0 )*inc_ap;
1911 dcomplex* a12p_b = a12p + (1 )*inc_ap;
1912
1913 dcomplex* v21_t = v21 + (0 )*inc_v;
1914 dcomplex* v21_b = v21 + (1 )*inc_v;
1915
1916 dcomplex* a01_b = a01 + (0 )*cs_A + (i-1)*rs_A;
1917
1918 dcomplex* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
1919 dcomplex* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
1920
1921 dcomplex* A02_l = A02 + (0 )*cs_A + (0 )*rs_A;
1922
1923 dcomplex* A22_l = A22 + (0 )*cs_A + (0 )*rs_A;
1924
1925 dcomplex* Y20_t = Y20 + (0 )*cs_Y + (0 )*rs_Y;
1926
1927 dcomplex* ABL = a10t;
1928 dcomplex* ZBL = z10t;
1929
1930 dcomplex* a2 = alpha11;
1931
1932 int m_ahead = m_A - i - 1;
1933 int n_ahead = n_A - i - 1;
1934 int m_behind = i;
1935 int n_behind = i;
1936
1937 /*------------------------------------------------------------*/
1938
1939 if ( m_behind > 0 )
1940 {
1941 // FLA_Copy( a01_b, last_elem );
1942 // FLA_Set( FLA_ONE, a01_b );
1943 last_elem = *a01_b;
1944 *a01_b = *buff_1;
1945 }
1946
1947 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
1948 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a01, FLA_ONE, a2 );
1951 m_ahead + 1,
1952 n_behind,
1953 buff_m1,
1954 ABL, rs_A, cs_A,
1955 y10t, cs_Y,
1956 buff_1,
1957 a2, rs_A );
1960 m_ahead + 1,
1961 n_behind,
1962 buff_m1,
1963 ZBL, rs_Z, cs_Z,
1964 a01, rs_A,
1965 buff_1,
1966 a2, rs_A );
1967
1968 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
1969 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, A02, z10t, FLA_ONE, a12t );
1972 n_ahead,
1973 n_behind,
1974 buff_m1,
1975 Y20, rs_Y, cs_Y,
1976 a10t, cs_A,
1977 buff_1,
1978 a12t, cs_A );
1981 m_behind,
1982 n_ahead,
1983 buff_m1,
1984 A02, rs_A, cs_A,
1985 z10t, cs_Z,
1986 buff_1,
1987 a12t, cs_A );
1988
1989 if ( m_behind > 0 )
1990 {
1991 // FLA_Copy( last_elem, a01_b );
1992 *a01_b = last_elem;
1993 }
1994
1995 // FLA_Househ2_UT( FLA_LEFT,
1996 // alpha11,
1997 // a21, tau11 );
1998 // FLA_Copy( a21, u21p );
2000 alpha11,
2001 a21, rs_A,
2002 tau11 );
2004 m_ahead,
2005 a21, rs_A,
2006 u21p, inc_up );
2007
2008 if ( n_ahead > 0 )
2009 {
2010 // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
2011 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
2013
2014 // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
2015 // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
2017 n_ahead,
2018 a12t, cs_A,
2019 a12p, inc_ap );
2021 n_ahead,
2023 a12t, cs_A,
2024 a12p, inc_ap );
2025
2026 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21p, FLA_ZERO, d0 );
2027 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, u21p, FLA_ZERO, e0 );
2030 m_ahead,
2031 n_behind,
2032 buff_1,
2033 A20, rs_A, cs_A,
2034 u21p, inc_up,
2035 buff_0,
2036 d0, inc_d );
2039 m_ahead,
2040 n_behind,
2041 buff_1,
2042 Z20, rs_Z, cs_Z,
2043 u21p, inc_up,
2044 buff_0,
2045 e0, inc_e );
2046
2047 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
2048 // FLA_Axpy( FLA_ONE, d0, t01 );
2050 n_behind,
2051 a10t, cs_A,
2052 t01, rs_T );
2054 n_behind,
2055 buff_1,
2056 d0, inc_d,
2057 t01, rs_T );
2058
2059 // FLA_Set( FLA_ZERO, y21 );
2060 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
2061 // FLA_Gemv( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, e0, FLA_ONE, y21 );
2063 buff_0,
2064 y21, rs_Y );
2067 n_ahead,
2068 n_behind,
2069 buff_m1,
2070 Y20, rs_Y, cs_Y,
2071 d0, inc_d,
2072 buff_1,
2073 y21, rs_Y );
2076 m_behind,
2077 n_ahead,
2078 buff_m1,
2079 A02, rs_A, cs_A,
2080 e0, inc_e,
2081 buff_1,
2082 y21, rs_Y );
2083
2084 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, u21p, FLA_ONE, y21 );
2087 m_ahead,
2088 n_ahead,
2089 buff_1,
2090 A22, rs_A, cs_A,
2091 u21p, inc_up,
2092 buff_1,
2093 y21, rs_Y );
2094
2095 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
2097 n_ahead,
2099 y21, rs_Y,
2100 a12p, inc_ap );
2101
2102 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
2105 m_ahead,
2106 n_ahead,
2107 buff_1,
2108 A22, rs_A, cs_A,
2109 a12p, inc_ap,
2110 buff_0,
2111 w21, inc_w );
2112
2113 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, Y20, a12p, FLA_ZERO, f0 );
2114 // FLA_Gemvc( FLA_CONJ_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A02, a12p, FLA_ZERO, g0 );
2117 n_ahead,
2118 n_behind,
2119 buff_1,
2120 Y20, rs_Y, cs_Y,
2121 a12p, inc_ap,
2122 buff_0,
2123 f0, inc_f );
2126 m_behind,
2127 n_ahead,
2128 buff_1,
2129 A02, rs_A, cs_A,
2130 a12p, inc_ap,
2131 buff_0,
2132 g0, inc_g );
2133
2134 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, w21 );
2135 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, g0, FLA_ONE, w21 );
2138 m_ahead,
2139 n_behind,
2140 buff_m1,
2141 A20, rs_A, cs_A,
2142 f0, inc_f,
2143 buff_1,
2144 w21, inc_w );
2147 m_ahead,
2148 n_behind,
2149 buff_m1,
2150 Z20, rs_Z, cs_Z,
2151 g0, inc_g,
2152 buff_1,
2153 w21, inc_w );
2154
2155 // FLA_Copy( A22_l, a22l );
2156 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, A20, Y20_t, FLA_ONE, a22l );
2157 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, Z20, A02_l, FLA_ONE, a22l );
2159 m_ahead,
2160 A22_l, rs_A,
2161 a22l, inc_al );
2164 m_ahead,
2165 n_behind,
2166 buff_m1,
2167 A20, rs_A, cs_A,
2168 Y20_t, cs_Y,
2169 buff_1,
2170 a22l, inc_al );
2173 m_ahead,
2174 n_behind,
2175 buff_m1,
2176 Z20, rs_Z, cs_Z,
2177 A02_l, rs_A,
2178 buff_1,
2179 a22l, inc_al );
2180
2181 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
2183 n_ahead,
2184 buff_1,
2185 a12t, cs_A,
2186 y21, rs_Y );
2187
2188 // FLA_Househ2s_UT( FLA_RIGHT,
2189 // a12p_t,
2190 // a12p_b,
2191 // alpha12, psi11_minus_alpha12, sigma11 );
2193 a12p_t,
2194 a12p_b, inc_ap,
2195 &alpha12,
2197 sigma11 );
2198
2199 // FLA_Copy( a12p, v21 );
2200 // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
2201 // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
2202 // FLA_Conjugate( v21_b );
2204 n_ahead,
2205 a12p, inc_ap,
2206 v21, inc_v );
2209 n_ahead,
2211 v21, inc_v );
2212 bl1_zconjv( n_ahead - 1,
2213 v21_b, inc_v );
2214
2215 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
2216 // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
2219
2220 // FLA_Copy( g0, s01 );
2221 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_conj_alpha12, A02_l, s01 );
2222 // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, s01 );
2224 n_behind,
2225 g0, inc_g,
2226 s01, rs_S );
2228 n_behind,
2230 A02_l, rs_A,
2231 s01, rs_S );
2233 n_behind,
2235 s01, rs_S );
2236
2237 // FLA_Copyt( FLA_NO_TRANSPOSE, alpha12, a12t_l );
2238 // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
2239 *a12t_l = alpha12;
2241 n_ahead - 1,
2242 v21_b, inc_v,
2243 a12t_r, cs_A );
2244 }
2245
2246 // FLA_Copy( u21p, u21 );
2248 m_ahead,
2249 u21p, inc_up,
2250 u21, inc_u );
2251
2252 if ( n_ahead > 0 )
2253 {
2254 // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
2255 // FLA_Scal( FLA_MINUS_ONE, beta );
2256 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
2258 n_ahead,
2259 y21, rs_Y,
2260 v21, inc_v,
2261 &beta );
2263
2264 // FLA_Copy( w21, z21 );
2265 // FLA_Axpy( minus_conj_alpha12, a22l, z21 );
2266 // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
2267 // FLA_Axpy( beta, u21, z21 );
2269 m_ahead,
2270 w21, inc_w,
2271 z21, rs_Z );
2273 m_ahead,
2275 a22l, inc_al,
2276 z21, rs_Z );
2278 m_ahead,
2280 z21, rs_Z );
2282 m_ahead,
2283 &beta,
2284 u21, inc_u,
2285 z21, rs_Z );
2286
2287 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
2288 // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
2290 n_ahead,
2291 tau11,
2292 y21, rs_Y );
2294 m_ahead,
2295 sigma11,
2296 z21, rs_Z );
2297 }
2298 else // if ( n_ahead == 0 )
2299 {
2300 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
2301 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
2303 n_behind,
2304 a10t, cs_A,
2305 t01, rs_T );
2308 m_ahead,
2309 n_behind,
2310 buff_1,
2311 A20, rs_A, cs_A,
2312 u21, inc_u,
2313 buff_1,
2314 t01, rs_T );
2315 }
2316
2317 /*------------------------------------------------------------*/
2318
2319 }
2320
2321 // FLA_Obj_free( &w );
2322 // FLA_Obj_free( &al );
2323 // FLA_Obj_free( &ap );
2324 // FLA_Obj_free( &u );
2325 // FLA_Obj_free( &up );
2326 // FLA_Obj_free( &v );
2327 // FLA_Obj_free( &d );
2328 // FLA_Obj_free( &e );
2329 // FLA_Obj_free( &f );
2330 // FLA_Obj_free( &g );
2331 FLA_free( buff_w );
2332 FLA_free( buff_al );
2333 FLA_free( buff_ap );
2334 FLA_free( buff_u );
2335 FLA_free( buff_up );
2336 FLA_free( buff_v );
2337 FLA_free( buff_d );
2338 FLA_free( buff_e );
2339 FLA_free( buff_f );
2340 FLA_free( buff_g );
2341
2342 return FLA_SUCCESS;
2343}
FLA_Error FLA_Househ2_UT_l_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *tau)
Definition FLA_Househ2_UT.c:521
FLA_Error FLA_Househ2s_UT_r_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *alpha, dcomplex *chi_1_minus_alpha, dcomplex *tau)
Definition FLA_Househ2s_UT.c:610
void bl1_zaxpyv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy)
Definition bl1_axpyv.c:60
void bl1_zconjv(int m, dcomplex *x, int incx)
Definition bl1_conjv.c:34
void bl1_zcopyv(conj1_t conj, int m, dcomplex *x, int incx, dcomplex *y, int incy)
Definition bl1_copyv.c:63
void bl1_zdot(conj1_t conj, int n, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *rho)
Definition bl1_dot.c:65
bl1_zscals(beta, rho_yz)
void bl1_zgemv(trans1_t transa, conj1_t conjx, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition bl1_gemv.c:255
void bl1_zinvscalv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx)
Definition bl1_invscalv.c:78
void bl1_zsetv(int m, dcomplex *sigma, dcomplex *x, int incx)
Definition bl1_setv.c:66
void bl1_zsetm(int m, int n, dcomplex *sigma, dcomplex *a, int a_rs, int a_cs)
Definition bl1_setm.c:78

References bl1_zaxpyv(), bl1_zconjv(), bl1_zcopyv(), bl1_zdot(), bl1_zgemv(), bl1_zinvscalv(), bl1_zscals(), bl1_zsetm(), bl1_zsetv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opz(), FLA_Househ2s_UT_r_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var4().