libflame revision_anchor
Functions
FLA_Bidiag_UT_u_opt_var2.c File Reference

(r)

Functions

FLA_Error FLA_Bidiag_UT_u_opt_var2 (FLA_Obj A, FLA_Obj TU, FLA_Obj TV)
 
FLA_Error FLA_Bidiag_UT_u_step_opt_var2 (FLA_Obj A, FLA_Obj T, FLA_Obj S)
 
FLA_Error FLA_Bidiag_UT_u_step_ops_var2 (int m_A, int n_A, int m_TS, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T, float *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_opd_var2 (int m_A, int n_A, int m_TS, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T, double *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_opc_var2 (int m_A, int n_A, int m_TS, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T, scomplex *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_opz_var2 (int m_A, int n_A, int m_TS, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T, dcomplex *buff_S, int rs_S, int cs_S)
 

Function Documentation

◆ FLA_Bidiag_UT_u_opt_var2()

FLA_Error FLA_Bidiag_UT_u_opt_var2 ( FLA_Obj  A,
FLA_Obj  TU,
FLA_Obj  TV 
)
14{
16}
FLA_Error FLA_Bidiag_UT_u_step_opt_var2(FLA_Obj A, FLA_Obj T, FLA_Obj S)
Definition FLA_Bidiag_UT_u_opt_var2.c:18
int i
Definition bl1_axmyv2.c:145

References FLA_Bidiag_UT_u_step_opt_var2(), and i.

Referenced by FLA_Bidiag_UT_u().

◆ FLA_Bidiag_UT_u_step_opc_var2()

FLA_Error FLA_Bidiag_UT_u_step_opc_var2 ( int  m_A,
int  n_A,
int  m_TS,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_T,
int  rs_T,
int  cs_T,
scomplex buff_S,
int  rs_S,
int  cs_S 
)
536{
540
542 int i;
543
544 // b_alg = FLA_Obj_length( T );
545 int b_alg = m_TS;
546
547 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
548 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
549 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
550 scomplex* buff_v = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
551 scomplex* buff_y = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
552 scomplex* buff_z = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
553 int inc_v = 1;
554 int inc_y = 1;
555 int inc_z = 1;
556
557 for ( i = 0; i < b_alg; ++i )
558 {
559 scomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
560 scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
561 scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
562 scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
563 scomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
564 scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
565 scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
566
567 scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
568 scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
569
570 scomplex* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
571 scomplex* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
572
573 scomplex* v21 = buff_v + (i+1)*inc_v;
574
575 scomplex* y21 = buff_y + (i+1)*inc_y;
576
577 scomplex* z21 = buff_z + (i+1)*inc_z;
578
579 scomplex* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
580 scomplex* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
581
582 scomplex* v21_t = v21 + (0 )*inc_v;
583 scomplex* v21_b = v21 + (1 )*inc_v;
584
585 int m_ahead = m_A - i - 1;
586 int n_ahead = n_A - i - 1;
587 int m_behind = i;
588 int n_behind = i;
589
590 /*------------------------------------------------------------*/
591
592 // FLA_Househ2_UT( FLA_LEFT,
593 // alpha11,
594 // a21, tau11 );
596 alpha11,
597 a21, rs_A,
598 tau11 );
599
600 if ( n_ahead > 0 )
601 {
602 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a12t, y21 );
603 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, a21, FLA_ONE, y21 );
605 n_ahead,
606 a12t, cs_A,
607 y21, inc_y );
610 m_ahead,
611 n_ahead,
612 buff_1,
613 A22, rs_A, cs_A,
614 a21, rs_A,
615 buff_1,
616 y21, inc_y );
617
618 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
620 n_ahead,
621 tau11,
622 y21, inc_y );
623
624 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, y21, a12t );
626 n_ahead,
627 buff_m1,
628 y21, inc_y,
629 a12t, cs_A );
630
631 // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
633 a12t_l,
634 a12t_r, cs_A,
635 sigma11 );
636
637 // FLA_Set( FLA_ONE, v21_t );
638 // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
639 *v21_t = *buff_1;
641 n_ahead - 1,
642 a12t_r, cs_A,
643 v21_b, inc_y );
644
645 // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
646 // FLA_Scal( FLA_MINUS_ONE, beta );
648 n_ahead,
649 y21, inc_y,
650 v21, inc_v,
651 &beta );
652 bl1_cneg1( &beta );
653
654 // FLA_Copy( a21, z21 );
655 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, v21, beta, z21 );
656 // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
658 m_ahead,
659 a21, rs_A,
660 z21, inc_z );
663 m_ahead,
664 n_ahead,
665 buff_1,
666 A22, rs_A, cs_A,
667 v21, inc_v,
668 &beta,
669 z21, inc_z );
671 m_ahead,
672 sigma11,
673 z21, inc_z );
674
675 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, a21, y21, A22 );
676 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
679 m_ahead,
680 n_ahead,
681 buff_m1,
682 a21, rs_A,
683 y21, inc_y,
684 A22, rs_A, cs_A );
687 m_ahead,
688 n_ahead,
689 buff_m1,
690 z21, inc_z,
691 v21, inc_v,
692 A22, rs_A, cs_A );
693
694 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
697 m_behind,
698 n_ahead,
699 buff_1,
700 A02, rs_A, cs_A,
701 v21, inc_v,
702 buff_0,
703 s01, rs_S );
704 }
705
706 // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
707 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
709 n_behind,
710 a10t, cs_A,
711 t01, rs_T );
714 m_ahead,
715 n_behind,
716 buff_1,
717 A20, rs_A, cs_A,
718 a21, rs_A,
719 buff_1,
720 t01, rs_T );
721
722 /*------------------------------------------------------------*/
723
724 }
725
726 // FLA_Obj_free( &v );
727 // FLA_Obj_free( &y );
728 // FLA_Obj_free( &z );
729 FLA_free( buff_v );
730 FLA_free( buff_y );
731 FLA_free( buff_z );
732
733 return FLA_SUCCESS;
734}
FLA_Obj FLA_MINUS_ONE
Definition FLA_Init.c:22
FLA_Obj FLA_ZERO
Definition FLA_Init.c:20
FLA_Obj FLA_ONE
Definition FLA_Init.c:18
void FLA_free(void *ptr)
Definition FLA_Memory.c:247
void * FLA_malloc(size_t size)
Definition FLA_Memory.c:111
FLA_Error FLA_Househ2_UT_r_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *tau)
Definition FLA_Househ2_UT.c:677
FLA_Error FLA_Househ2_UT_l_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *tau)
Definition FLA_Househ2_UT.c:390
void bl1_caxpyv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
Definition bl1_axpyv.c:29
void bl1_ccopyv(conj1_t conj, int m, scomplex *x, int incx, scomplex *y, int incy)
Definition bl1_copyv.c:49
void bl1_cdot(conj1_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho)
Definition bl1_dot.c:39
void bl1_cgemv(trans1_t transa, conj1_t conjx, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition bl1_gemv.c:125
void bl1_cger(conj1_t conjx, conj1_t conjy, int m, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy, scomplex *a, int a_rs, int a_cs)
Definition bl1_ger.c:111
void bl1_cinvscalv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx)
Definition bl1_invscalv.c:52
@ BLIS1_NO_TRANSPOSE
Definition blis_type_defs.h:54
@ BLIS1_CONJ_TRANSPOSE
Definition blis_type_defs.h:57
@ BLIS1_CONJ_NO_TRANSPOSE
Definition blis_type_defs.h:56
@ BLIS1_CONJUGATE
Definition blis_type_defs.h:82
@ BLIS1_NO_CONJUGATE
Definition blis_type_defs.h:81
Definition blis_type_defs.h:133

References bl1_caxpyv(), bl1_ccopyv(), bl1_cdot(), bl1_cgemv(), bl1_cger(), bl1_cinvscalv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opc(), FLA_Househ2_UT_r_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var2().

◆ FLA_Bidiag_UT_u_step_opd_var2()

FLA_Error FLA_Bidiag_UT_u_step_opd_var2 ( int  m_A,
int  n_A,
int  m_TS,
double buff_A,
int  rs_A,
int  cs_A,
double buff_T,
int  rs_T,
int  cs_T,
double buff_S,
int  rs_S,
int  cs_S 
)
328{
329 double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
330 double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
332
333 double beta;
334 int i;
335
336 // b_alg = FLA_Obj_length( T );
337 int b_alg = m_TS;
338
339 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
340 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
341 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
342 double* buff_v = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
343 double* buff_y = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
344 double* buff_z = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
345 int inc_v = 1;
346 int inc_y = 1;
347 int inc_z = 1;
348
349 for ( i = 0; i < b_alg; ++i )
350 {
351 double* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
352 double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
353 double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
354 double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
355 double* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
356 double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
357 double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
358
359 double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
360 double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
361
362 double* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
363 double* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
364
365 double* v21 = buff_v + (i+1)*inc_v;
366
367 double* y21 = buff_y + (i+1)*inc_y;
368
369 double* z21 = buff_z + (i+1)*inc_z;
370
371 double* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
372 double* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
373
374 double* v21_t = v21 + (0 )*inc_v;
375 double* v21_b = v21 + (1 )*inc_v;
376
377 int m_ahead = m_A - i - 1;
378 int n_ahead = n_A - i - 1;
379 int m_behind = i;
380 int n_behind = i;
381
382 /*------------------------------------------------------------*/
383
384 // FLA_Househ2_UT( FLA_LEFT,
385 // alpha11,
386 // a21, tau11 );
388 alpha11,
389 a21, rs_A,
390 tau11 );
391
392 if ( n_ahead > 0 )
393 {
394 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a12t, y21 );
395 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, a21, FLA_ONE, y21 );
397 n_ahead,
398 a12t, cs_A,
399 y21, inc_y );
402 m_ahead,
403 n_ahead,
404 buff_1,
405 A22, rs_A, cs_A,
406 a21, rs_A,
407 buff_1,
408 y21, inc_y );
409
410 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
412 n_ahead,
413 tau11,
414 y21, inc_y );
415
416 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, y21, a12t );
418 n_ahead,
419 buff_m1,
420 y21, inc_y,
421 a12t, cs_A );
422
423 // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
425 a12t_l,
426 a12t_r, cs_A,
427 sigma11 );
428
429 // FLA_Set( FLA_ONE, v21_t );
430 // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
431 *v21_t = *buff_1;
433 n_ahead - 1,
434 a12t_r, cs_A,
435 v21_b, inc_y );
436
437 // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
438 // FLA_Scal( FLA_MINUS_ONE, beta );
440 n_ahead,
441 y21, inc_y,
442 v21, inc_v,
443 &beta );
444 bl1_dneg1( &beta );
445
446 // FLA_Copy( a21, z21 );
447 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, v21, beta, z21 );
448 // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
450 m_ahead,
451 a21, rs_A,
452 z21, inc_z );
455 m_ahead,
456 n_ahead,
457 buff_1,
458 A22, rs_A, cs_A,
459 v21, inc_v,
460 &beta,
461 z21, inc_z );
463 m_ahead,
464 sigma11,
465 z21, inc_z );
466
467 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, a21, y21, A22 );
468 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
471 m_ahead,
472 n_ahead,
473 buff_m1,
474 a21, rs_A,
475 y21, inc_y,
476 A22, rs_A, cs_A );
479 m_ahead,
480 n_ahead,
481 buff_m1,
482 z21, inc_z,
483 v21, inc_v,
484 A22, rs_A, cs_A );
485
486 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
489 m_behind,
490 n_ahead,
491 buff_1,
492 A02, rs_A, cs_A,
493 v21, inc_v,
494 buff_0,
495 s01, rs_S );
496 }
497
498 // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
499 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
501 n_behind,
502 a10t, cs_A,
503 t01, rs_T );
506 m_ahead,
507 n_behind,
508 buff_1,
509 A20, rs_A, cs_A,
510 a21, rs_A,
511 buff_1,
512 t01, rs_T );
513
514 /*------------------------------------------------------------*/
515
516 }
517
518 // FLA_Obj_free( &v );
519 // FLA_Obj_free( &y );
520 // FLA_Obj_free( &z );
521 FLA_free( buff_v );
522 FLA_free( buff_y );
523 FLA_free( buff_z );
524
525 return FLA_SUCCESS;
526}
FLA_Error FLA_Househ2_UT_r_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *tau)
Definition FLA_Househ2_UT.c:664
FLA_Error FLA_Househ2_UT_l_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *tau)
Definition FLA_Househ2_UT.c:274
void bl1_daxpyv(conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy)
Definition bl1_axpyv.c:21
void bl1_dcopyv(conj1_t conj, int m, double *x, int incx, double *y, int incy)
Definition bl1_copyv.c:42
void bl1_ddot(conj1_t conj, int n, double *x, int incx, double *y, int incy, double *rho)
Definition bl1_dot.c:26
void bl1_dgemv(trans1_t transa, conj1_t conjx, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
Definition bl1_gemv.c:69
void bl1_dger(conj1_t conjx, conj1_t conjy, int m, int n, double *alpha, double *x, int incx, double *y, int incy, double *a, int a_rs, int a_cs)
Definition bl1_ger.c:62
void bl1_dinvscalv(conj1_t conj, int n, double *alpha, double *x, int incx)
Definition bl1_invscalv.c:26

References bl1_daxpyv(), bl1_dcopyv(), bl1_ddot(), bl1_dgemv(), bl1_dger(), bl1_dinvscalv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opd(), FLA_Househ2_UT_r_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var2().

◆ FLA_Bidiag_UT_u_step_ops_var2()

FLA_Error FLA_Bidiag_UT_u_step_ops_var2 ( int  m_A,
int  n_A,
int  m_TS,
float buff_A,
int  rs_A,
int  cs_A,
float buff_T,
int  rs_T,
int  cs_T,
float buff_S,
int  rs_S,
int  cs_S 
)
120{
121 float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
122 float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
124
125 float beta;
126 int i;
127
128 // b_alg = FLA_Obj_length( T );
129 int b_alg = m_TS;
130
131 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
132 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
133 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
134 float* buff_v = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
135 float* buff_y = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
136 float* buff_z = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
137 int inc_v = 1;
138 int inc_y = 1;
139 int inc_z = 1;
140
141 for ( i = 0; i < b_alg; ++i )
142 {
143 float* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
144 float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
145 float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
146 float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
147 float* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
148 float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
149 float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
150
151 float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
152 float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
153
154 float* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
155 float* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
156
157 float* v21 = buff_v + (i+1)*inc_v;
158
159 float* y21 = buff_y + (i+1)*inc_y;
160
161 float* z21 = buff_z + (i+1)*inc_z;
162
163 float* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
164 float* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
165
166 float* v21_t = v21 + (0 )*inc_v;
167 float* v21_b = v21 + (1 )*inc_v;
168
169 int m_ahead = m_A - i - 1;
170 int n_ahead = n_A - i - 1;
171 int m_behind = i;
172 int n_behind = i;
173
174 /*------------------------------------------------------------*/
175
176 // FLA_Househ2_UT( FLA_LEFT,
177 // alpha11,
178 // a21, tau11 );
180 alpha11,
181 a21, rs_A,
182 tau11 );
183
184 if ( n_ahead > 0 )
185 {
186 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a12t, y21 );
187 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, a21, FLA_ONE, y21 );
189 n_ahead,
190 a12t, cs_A,
191 y21, inc_y );
194 m_ahead,
195 n_ahead,
196 buff_1,
197 A22, rs_A, cs_A,
198 a21, rs_A,
199 buff_1,
200 y21, inc_y );
201
202 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
204 n_ahead,
205 tau11,
206 y21, inc_y );
207
208 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, y21, a12t );
210 n_ahead,
211 buff_m1,
212 y21, inc_y,
213 a12t, cs_A );
214
215 // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
217 a12t_l,
218 a12t_r, cs_A,
219 sigma11 );
220
221 // FLA_Set( FLA_ONE, v21_t );
222 // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
223 *v21_t = *buff_1;
225 n_ahead - 1,
226 a12t_r, cs_A,
227 v21_b, inc_y );
228
229 // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
230 // FLA_Scal( FLA_MINUS_ONE, beta );
232 n_ahead,
233 y21, inc_y,
234 v21, inc_v,
235 &beta );
236 bl1_sneg1( &beta );
237
238 // FLA_Copy( a21, z21 );
239 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, v21, beta, z21 );
240 // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
242 m_ahead,
243 a21, rs_A,
244 z21, inc_z );
247 m_ahead,
248 n_ahead,
249 buff_1,
250 A22, rs_A, cs_A,
251 v21, inc_v,
252 &beta,
253 z21, inc_z );
255 m_ahead,
256 sigma11,
257 z21, inc_z );
258
259 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, a21, y21, A22 );
260 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
263 m_ahead,
264 n_ahead,
265 buff_m1,
266 a21, rs_A,
267 y21, inc_y,
268 A22, rs_A, cs_A );
271 m_ahead,
272 n_ahead,
273 buff_m1,
274 z21, inc_z,
275 v21, inc_v,
276 A22, rs_A, cs_A );
277
278 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
281 m_behind,
282 n_ahead,
283 buff_1,
284 A02, rs_A, cs_A,
285 v21, inc_v,
286 buff_0,
287 s01, rs_S );
288 }
289
290 // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
291 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
293 n_behind,
294 a10t, cs_A,
295 t01, rs_T );
298 m_ahead,
299 n_behind,
300 buff_1,
301 A20, rs_A, cs_A,
302 a21, rs_A,
303 buff_1,
304 t01, rs_T );
305
306 /*------------------------------------------------------------*/
307
308 }
309
310 // FLA_Obj_free( &v );
311 // FLA_Obj_free( &y );
312 // FLA_Obj_free( &z );
313 FLA_free( buff_v );
314 FLA_free( buff_y );
315 FLA_free( buff_z );
316
317 return FLA_SUCCESS;
318}
FLA_Error FLA_Househ2_UT_l_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *tau)
Definition FLA_Househ2_UT.c:160
FLA_Error FLA_Househ2_UT_r_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *tau)
Definition FLA_Househ2_UT.c:651
void bl1_saxpyv(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy)
Definition bl1_axpyv.c:13
void bl1_scopyv(conj1_t conj, int m, float *x, int incx, float *y, int incy)
Definition bl1_copyv.c:35
void bl1_sdot(conj1_t conj, int n, float *x, int incx, float *y, int incy, float *rho)
Definition bl1_dot.c:13
void bl1_sgemv(trans1_t transa, conj1_t conjx, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
Definition bl1_gemv.c:13
void bl1_sger(conj1_t conjx, conj1_t conjy, int m, int n, float *alpha, float *x, int incx, float *y, int incy, float *a, int a_rs, int a_cs)
Definition bl1_ger.c:13
void bl1_sinvscalv(conj1_t conj, int n, float *alpha, float *x, int incx)
Definition bl1_invscalv.c:13

References bl1_saxpyv(), bl1_scopyv(), bl1_sdot(), bl1_sgemv(), bl1_sger(), bl1_sinvscalv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_ops(), FLA_Househ2_UT_r_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var2().

◆ FLA_Bidiag_UT_u_step_opt_var2()

FLA_Error FLA_Bidiag_UT_u_step_opt_var2 ( FLA_Obj  A,
FLA_Obj  T,
FLA_Obj  S 
)
19{
20 FLA_Datatype datatype;
21 int m_A, n_A, m_TS;
22 int rs_A, cs_A;
23 int rs_T, cs_T;
24 int rs_S, cs_S;
25
26 datatype = FLA_Obj_datatype( A );
27
28 m_A = FLA_Obj_length( A );
29 n_A = FLA_Obj_width( A );
31
34
37
40
41
42 switch ( datatype )
43 {
44 case FLA_FLOAT:
45 {
46 float* buff_A = FLA_FLOAT_PTR( A );
47 float* buff_T = FLA_FLOAT_PTR( T );
48 float* buff_S = FLA_FLOAT_PTR( S );
49
51 n_A,
52 m_TS,
55 buff_S, rs_S, cs_S );
56
57 break;
58 }
59
60 case FLA_DOUBLE:
61 {
62 double* buff_A = FLA_DOUBLE_PTR( A );
63 double* buff_T = FLA_DOUBLE_PTR( T );
64 double* buff_S = FLA_DOUBLE_PTR( S );
65
67 n_A,
68 m_TS,
71 buff_S, rs_S, cs_S );
72
73 break;
74 }
75
76 case FLA_COMPLEX:
77 {
81
83 n_A,
84 m_TS,
87 buff_S, rs_S, cs_S );
88
89 break;
90 }
91
93 {
97
99 n_A,
100 m_TS,
101 buff_A, rs_A, cs_A,
102 buff_T, rs_T, cs_T,
103 buff_S, rs_S, cs_S );
104
105 break;
106 }
107 }
108
109 return FLA_SUCCESS;
110}
FLA_Error FLA_Bidiag_UT_u_step_opz_var2(int m_A, int n_A, int m_TS, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T, dcomplex *buff_S, int rs_S, int cs_S)
Definition FLA_Bidiag_UT_u_opt_var2.c:738
FLA_Error FLA_Bidiag_UT_u_step_opc_var2(int m_A, int n_A, int m_TS, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T, scomplex *buff_S, int rs_S, int cs_S)
Definition FLA_Bidiag_UT_u_opt_var2.c:530
FLA_Error FLA_Bidiag_UT_u_step_opd_var2(int m_A, int n_A, int m_TS, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T, double *buff_S, int rs_S, int cs_S)
Definition FLA_Bidiag_UT_u_opt_var2.c:322
FLA_Error FLA_Bidiag_UT_u_step_ops_var2(int m_A, int n_A, int m_TS, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T, float *buff_S, int rs_S, int cs_S)
Definition FLA_Bidiag_UT_u_opt_var2.c:114
dim_t FLA_Obj_width(FLA_Obj obj)
Definition FLA_Query.c:123
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition FLA_Query.c:167
dim_t FLA_Obj_length(FLA_Obj obj)
Definition FLA_Query.c:116
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition FLA_Query.c:174
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition FLA_Query.c:13
int FLA_Datatype
Definition FLA_type_defs.h:49
Definition blis_type_defs.h:138

References FLA_Bidiag_UT_u_step_opc_var2(), FLA_Bidiag_UT_u_step_opd_var2(), FLA_Bidiag_UT_u_step_ops_var2(), FLA_Bidiag_UT_u_step_opz_var2(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), and i.

Referenced by FLA_Bidiag_UT_u_blk_var2(), and FLA_Bidiag_UT_u_opt_var2().

◆ FLA_Bidiag_UT_u_step_opz_var2()

FLA_Error FLA_Bidiag_UT_u_step_opz_var2 ( int  m_A,
int  n_A,
int  m_TS,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_T,
int  rs_T,
int  cs_T,
dcomplex buff_S,
int  rs_S,
int  cs_S 
)
744{
748
750 int i;
751
752 // b_alg = FLA_Obj_length( T );
753 int b_alg = m_TS;
754
755 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
756 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
757 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
758 dcomplex* buff_v = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
759 dcomplex* buff_y = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
760 dcomplex* buff_z = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
761 int inc_v = 1;
762 int inc_y = 1;
763 int inc_z = 1;
764
765 for ( i = 0; i < b_alg; ++i )
766 {
767 dcomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
768 dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
769 dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
770 dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
771 dcomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
772 dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
773 dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
774
775 dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
776 dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
777
778 dcomplex* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
779 dcomplex* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
780
781 dcomplex* v21 = buff_v + (i+1)*inc_v;
782
783 dcomplex* y21 = buff_y + (i+1)*inc_y;
784
785 dcomplex* z21 = buff_z + (i+1)*inc_z;
786
787 dcomplex* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
788 dcomplex* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
789
790 dcomplex* v21_t = v21 + (0 )*inc_v;
791 dcomplex* v21_b = v21 + (1 )*inc_v;
792
793 int m_ahead = m_A - i - 1;
794 int n_ahead = n_A - i - 1;
795 int m_behind = i;
796 int n_behind = i;
797
798 /*------------------------------------------------------------*/
799
800 // FLA_Househ2_UT( FLA_LEFT,
801 // alpha11,
802 // a21, tau11 );
804 alpha11,
805 a21, rs_A,
806 tau11 );
807
808 if ( n_ahead > 0 )
809 {
810 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a12t, y21 );
811 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, a21, FLA_ONE, y21 );
813 n_ahead,
814 a12t, cs_A,
815 y21, inc_y );
818 m_ahead,
819 n_ahead,
820 buff_1,
821 A22, rs_A, cs_A,
822 a21, rs_A,
823 buff_1,
824 y21, inc_y );
825
826 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
828 n_ahead,
829 tau11,
830 y21, inc_y );
831
832 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, y21, a12t );
834 n_ahead,
835 buff_m1,
836 y21, inc_y,
837 a12t, cs_A );
838
839 // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
841 a12t_l,
842 a12t_r, cs_A,
843 sigma11 );
844
845 // FLA_Set( FLA_ONE, v21_t );
846 // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
847 *v21_t = *buff_1;
849 n_ahead - 1,
850 a12t_r, cs_A,
851 v21_b, inc_y );
852
853 // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
854 // FLA_Scal( FLA_MINUS_ONE, beta );
856 n_ahead,
857 y21, inc_y,
858 v21, inc_v,
859 &beta );
860 bl1_zneg1( &beta );
861
862 // FLA_Copy( a21, z21 );
863 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, v21, beta, z21 );
864 // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
866 m_ahead,
867 a21, rs_A,
868 z21, inc_z );
871 m_ahead,
872 n_ahead,
873 buff_1,
874 A22, rs_A, cs_A,
875 v21, inc_v,
876 &beta,
877 z21, inc_z );
879 m_ahead,
880 sigma11,
881 z21, inc_z );
882
883 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, a21, y21, A22 );
884 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
887 m_ahead,
888 n_ahead,
889 buff_m1,
890 a21, rs_A,
891 y21, inc_y,
892 A22, rs_A, cs_A );
895 m_ahead,
896 n_ahead,
897 buff_m1,
898 z21, inc_z,
899 v21, inc_v,
900 A22, rs_A, cs_A );
901
902 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
905 m_behind,
906 n_ahead,
907 buff_1,
908 A02, rs_A, cs_A,
909 v21, inc_v,
910 buff_0,
911 s01, rs_S );
912 }
913
914 // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
915 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
917 n_behind,
918 a10t, cs_A,
919 t01, rs_T );
922 m_ahead,
923 n_behind,
924 buff_1,
925 A20, rs_A, cs_A,
926 a21, rs_A,
927 buff_1,
928 t01, rs_T );
929
930 /*------------------------------------------------------------*/
931
932 }
933
934 // FLA_Obj_free( &v );
935 // FLA_Obj_free( &y );
936 // FLA_Obj_free( &z );
937 FLA_free( buff_v );
938 FLA_free( buff_y );
939 FLA_free( buff_z );
940
941 return FLA_SUCCESS;
942}
FLA_Error FLA_Househ2_UT_l_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *tau)
Definition FLA_Househ2_UT.c:521
FLA_Error FLA_Househ2_UT_r_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *tau)
Definition FLA_Househ2_UT.c:693
void bl1_zaxpyv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy)
Definition bl1_axpyv.c:60
void bl1_zcopyv(conj1_t conj, int m, dcomplex *x, int incx, dcomplex *y, int incy)
Definition bl1_copyv.c:63
void bl1_zdot(conj1_t conj, int n, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *rho)
Definition bl1_dot.c:65
void bl1_zgemv(trans1_t transa, conj1_t conjx, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition bl1_gemv.c:255
void bl1_zger(conj1_t conjx, conj1_t conjy, int m, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *a, int a_rs, int a_cs)
Definition bl1_ger.c:194
void bl1_zinvscalv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx)
Definition bl1_invscalv.c:78

References bl1_zaxpyv(), bl1_zcopyv(), bl1_zdot(), bl1_zgemv(), bl1_zger(), bl1_zinvscalv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opz(), FLA_Househ2_UT_r_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var2().