libflame revision_anchor
Functions
FLA_Tridiag_UT_l_opt_var3.c File Reference

(r)

Functions

FLA_Error FLA_Tridiag_UT_l_opt_var3 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Tridiag_UT_l_step_opt_var3 (FLA_Obj A, FLA_Obj Z, FLA_Obj T)
 
FLA_Error FLA_Tridiag_UT_l_step_ops_var3 (int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_Z, int rs_Z, int cs_Z, float *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Tridiag_UT_l_step_opd_var3 (int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_Z, int rs_Z, int cs_Z, double *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Tridiag_UT_l_step_opc_var3 (int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Tridiag_UT_l_step_opz_var3 (int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_T, int rs_T, int cs_T)
 

Function Documentation

◆ FLA_Tridiag_UT_l_opt_var3()

FLA_Error FLA_Tridiag_UT_l_opt_var3 ( FLA_Obj  A,
FLA_Obj  T 
)
14{
16 FLA_Obj Z;
17
19
21
22 FLA_Obj_free( &Z );
23
24 return r_val;
25}
FLA_Error FLA_Tridiag_UT_l_step_opt_var3(FLA_Obj A, FLA_Obj Z, FLA_Obj T)
Definition FLA_Tridiag_UT_l_opt_var3.c:27
FLA_Error FLA_Obj_create_conf_to(FLA_Trans trans, FLA_Obj old, FLA_Obj *obj)
Definition FLA_Obj.c:286
FLA_Error FLA_Obj_free(FLA_Obj *obj)
Definition FLA_Obj.c:588
int FLA_Error
Definition FLA_type_defs.h:47
int i
Definition bl1_axmyv2.c:145
Definition FLA_type_defs.h:159

References FLA_Obj_create_conf_to(), FLA_Obj_free(), FLA_Tridiag_UT_l_step_opt_var3(), and i.

Referenced by FLA_Tridiag_UT_l().

◆ FLA_Tridiag_UT_l_step_opc_var3()

FLA_Error FLA_Tridiag_UT_l_step_opc_var3 ( int  m_A,
int  m_T,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_Z,
int  rs_Z,
int  cs_Z,
scomplex buff_T,
int  rs_T,
int  cs_T 
)
569{
574
579 int i;
580
581 // b_alg = FLA_Obj_length( T );
582 int b_alg = m_T;
583
584 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &d );
585 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
586 scomplex* buff_d = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
587 scomplex* buff_f = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
588 int inc_d = 1;
589 int inc_f = 1;
590
591 // FLA_Set( FLA_ZERO, Z );
592 bl1_csetm( m_A,
593 b_alg,
594 buff_0,
595 buff_Z, rs_Z, cs_Z );
596
597 for ( i = 0; i < b_alg; ++i )
598 {
599 scomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
600 scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
601 scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
602 scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
603 scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
604
605 scomplex* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
606 scomplex* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
607 scomplex* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
608
609 scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
610 scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
611
612 scomplex* d01 = buff_d + (0 )*inc_d;
613
614 scomplex* f01 = buff_f + (0 )*inc_f;
615
616 scomplex* a10t_r = a10t + (i-1)*cs_A + (0 )*rs_A;
617
618 scomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
619 scomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
620
621 scomplex* ABL = a10t;
622 scomplex* ZBL = z10t;
623
625
626 int m_ahead = m_A - i - 1;
627 int m_behind = i;
628 int n_behind = i;
629
630 /*------------------------------------------------------------*/
631
632 if ( m_behind > 0 )
633 {
634 // FLA_Copy( a10t_r, last_elem );
635 // FLA_Set( FLA_ONE, a10t_r );
636 last_elem = *a10t_r;
637 *a10t_r = *buff_1;
638 }
639
640 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, z10t, FLA_ONE, a2 );
641 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a10t, FLA_ONE, a2 );
644 m_ahead + 1,
645 n_behind,
646 buff_m1,
647 ABL, rs_A, cs_A,
648 z10t, cs_Z,
649 buff_1,
650 a2, rs_A );
653 m_ahead + 1,
654 n_behind,
655 buff_m1,
656 ZBL, rs_Z, cs_Z,
657 a10t, cs_A,
658 buff_1,
659 a2, rs_A );
660
661 if ( m_behind > 0 )
662 {
663 // FLA_Copy( last_elem, a10t_r );
664 *a10t_r = last_elem;
665 }
666
667 if ( m_ahead > 0 )
668 {
669 // FLA_Househ2_UT( FLA_LEFT,
670 // a21_t,
671 // a21_b, tau11 );
673 a21_t,
674 a21_b, rs_A,
675 tau11 );
676
677 // FLA_Set( FLA_ONE, inv_tau11 );
678 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
679 // FLA_Copy( inv_tau11, minus_inv_tau11 );
680 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
683
684 // FLA_Copy( a21_t, first_elem );
685 // FLA_Set( FLA_ONE, a21_t );
686 first_elem = *a21_t;
687 *a21_t = *buff_1;
688
689 // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, z21 );
692 m_ahead,
693 buff_1,
694 A22, rs_A, cs_A,
695 a21, rs_A,
696 buff_0,
697 z21, rs_Z );
698
699 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, d01 );
700 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, a21, FLA_ZERO, f01 );
703 m_ahead,
704 n_behind,
705 buff_1,
706 A20, rs_A, cs_A,
707 a21, rs_A,
708 buff_0,
709 d01, inc_d );
712 m_ahead,
713 n_behind,
714 buff_1,
715 Z20, rs_Z, cs_Z,
716 a21, rs_A,
717 buff_0,
718 f01, inc_f );
719
720 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f01, FLA_ONE, z21 );
721 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, d01, FLA_ONE, z21 );
724 m_ahead,
725 n_behind,
726 buff_m1,
727 A20, rs_A, cs_A,
728 f01, inc_f,
729 buff_1,
730 z21, rs_Z );
733 m_ahead,
734 n_behind,
735 buff_m1,
736 Z20, rs_Z, cs_Z,
737 d01, inc_d,
738 buff_1,
739 z21, rs_Z );
740
741 // FLA_Copy( d01, t01 );
743 n_behind,
744 d01, inc_d,
745 t01, rs_T );
746
747 // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
748 // FLA_Inv_scal( FLA_TWO, beta );
750 m_ahead,
751 a21, rs_A,
752 z21, rs_Z,
753 &beta );
755
756 // FLA_Scal( minus_inv_tau11, beta );
757 // FLA_Axpy( beta, a21, z21 );
758 // FLA_Scal( inv_tau11, z21 );
761 m_ahead,
762 &beta,
763 a21, rs_A,
764 z21, rs_Z );
766 m_ahead,
767 &inv_tau11,
768 z21, rs_Z );
769
770 // FLA_Copy( first_elem, a21_t );
771 *a21_t = first_elem;
772 }
773
774 /*------------------------------------------------------------*/
775
776 }
777
778 // FLA_Obj_free( &d );
779 // FLA_Obj_free( &f );
780 FLA_free( buff_d );
781 FLA_free( buff_f );
782
783 return FLA_SUCCESS;
784}
FLA_Obj FLA_TWO
Definition FLA_Init.c:17
FLA_Obj FLA_MINUS_ONE
Definition FLA_Init.c:22
FLA_Obj FLA_ZERO
Definition FLA_Init.c:20
FLA_Obj FLA_ONE
Definition FLA_Init.c:18
void FLA_free(void *ptr)
Definition FLA_Memory.c:247
void * FLA_malloc(size_t size)
Definition FLA_Memory.c:111
FLA_Error FLA_Househ2_UT_l_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *tau)
Definition FLA_Househ2_UT.c:390
void bl1_caxpyv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
Definition bl1_axpyv.c:29
void bl1_ccopyv(conj1_t conj, int m, scomplex *x, int incx, scomplex *y, int incy)
Definition bl1_copyv.c:49
void bl1_cdot(conj1_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho)
Definition bl1_dot.c:39
void bl1_cgemv(trans1_t transa, conj1_t conjx, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition bl1_gemv.c:125
void bl1_chemv(uplo1_t uplo, conj1_t conj, int m, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition bl1_hemv.c:35
void bl1_cscalv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx)
Definition bl1_scalv.c:46
void bl1_csetm(int m, int n, scomplex *sigma, scomplex *a, int a_rs, int a_cs)
Definition bl1_setm.c:61
@ BLIS1_LOWER_TRIANGULAR
Definition blis_type_defs.h:62
@ BLIS1_NO_TRANSPOSE
Definition blis_type_defs.h:54
@ BLIS1_CONJ_TRANSPOSE
Definition blis_type_defs.h:57
@ BLIS1_CONJUGATE
Definition blis_type_defs.h:82
@ BLIS1_NO_CONJUGATE
Definition blis_type_defs.h:81
Definition blis_type_defs.h:133

References bl1_caxpyv(), bl1_ccopyv(), bl1_cdot(), bl1_cgemv(), bl1_chemv(), bl1_cscalv(), bl1_csetm(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_step_opt_var3().

◆ FLA_Tridiag_UT_l_step_opd_var3()

FLA_Error FLA_Tridiag_UT_l_step_opd_var3 ( int  m_A,
int  m_T,
double buff_A,
int  rs_A,
int  cs_A,
double buff_Z,
int  rs_Z,
int  cs_Z,
double buff_T,
int  rs_T,
int  cs_T 
)
346{
347 double* buff_2 = FLA_DOUBLE_PTR( FLA_TWO );
348 double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
349 double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
351
352 double first_elem, last_elem;
353 double beta;
354 double inv_tau11;
355 double minus_inv_tau11;
356 int i;
357
358 // b_alg = FLA_Obj_length( T );
359 int b_alg = m_T;
360
361 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &d );
362 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
363 double* buff_d = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
364 double* buff_f = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
365 int inc_d = 1;
366 int inc_f = 1;
367
368 // FLA_Set( FLA_ZERO, Z );
369 bl1_dsetm( m_A,
370 b_alg,
371 buff_0,
372 buff_Z, rs_Z, cs_Z );
373
374 for ( i = 0; i < b_alg; ++i )
375 {
376 double* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
377 double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
378 double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
379 double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
380 double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
381
382 double* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
383 double* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
384 double* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
385
386 double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
387 double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
388
389 double* d01 = buff_d + (0 )*inc_d;
390
391 double* f01 = buff_f + (0 )*inc_f;
392
393 double* a10t_r = a10t + (i-1)*cs_A + (0 )*rs_A;
394
395 double* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
396 double* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
397
398 double* ABL = a10t;
399 double* ZBL = z10t;
400
401 double* a2 = alpha11;
402
403 int m_ahead = m_A - i - 1;
404 int m_behind = i;
405 int n_behind = i;
406
407 /*------------------------------------------------------------*/
408
409 if ( m_behind > 0 )
410 {
411 // FLA_Copy( a10t_r, last_elem );
412 // FLA_Set( FLA_ONE, a10t_r );
413 last_elem = *a10t_r;
414 *a10t_r = *buff_1;
415 }
416
417 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, z10t, FLA_ONE, a2 );
418 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a10t, FLA_ONE, a2 );
421 m_ahead + 1,
422 n_behind,
423 buff_m1,
424 ABL, rs_A, cs_A,
425 z10t, cs_Z,
426 buff_1,
427 a2, rs_A );
430 m_ahead + 1,
431 n_behind,
432 buff_m1,
433 ZBL, rs_Z, cs_Z,
434 a10t, cs_A,
435 buff_1,
436 a2, rs_A );
437
438 if ( m_behind > 0 )
439 {
440 // FLA_Copy( last_elem, a10t_r );
441 *a10t_r = last_elem;
442 }
443
444 if ( m_ahead > 0 )
445 {
446 // FLA_Househ2_UT( FLA_LEFT,
447 // a21_t,
448 // a21_b, tau11 );
450 a21_t,
451 a21_b, rs_A,
452 tau11 );
453
454 // FLA_Set( FLA_ONE, inv_tau11 );
455 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
456 // FLA_Copy( inv_tau11, minus_inv_tau11 );
457 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
460
461 // FLA_Copy( a21_t, first_elem );
462 // FLA_Set( FLA_ONE, a21_t );
463 first_elem = *a21_t;
464 *a21_t = *buff_1;
465
466 // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, z21 );
468 m_ahead,
469 buff_1,
470 A22, rs_A, cs_A,
471 a21, rs_A,
472 buff_0,
473 z21, rs_Z );
474
475 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, d01 );
476 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, a21, FLA_ZERO, f01 );
479 m_ahead,
480 n_behind,
481 buff_1,
482 A20, rs_A, cs_A,
483 a21, rs_A,
484 buff_0,
485 d01, inc_d );
488 m_ahead,
489 n_behind,
490 buff_1,
491 Z20, rs_Z, cs_Z,
492 a21, rs_A,
493 buff_0,
494 f01, inc_f );
495
496 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f01, FLA_ONE, z21 );
497 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, d01, FLA_ONE, z21 );
500 m_ahead,
501 n_behind,
502 buff_m1,
503 A20, rs_A, cs_A,
504 f01, inc_f,
505 buff_1,
506 z21, rs_Z );
509 m_ahead,
510 n_behind,
511 buff_m1,
512 Z20, rs_Z, cs_Z,
513 d01, inc_d,
514 buff_1,
515 z21, rs_Z );
516
517 // FLA_Copy( d01, t01 );
519 n_behind,
520 d01, inc_d,
521 t01, rs_T );
522
523 // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
524 // FLA_Inv_scal( FLA_TWO, beta );
526 m_ahead,
527 a21, rs_A,
528 z21, rs_Z,
529 &beta );
531
532 // FLA_Scal( minus_inv_tau11, beta );
533 // FLA_Axpy( beta, a21, z21 );
534 // FLA_Scal( inv_tau11, z21 );
537 m_ahead,
538 &beta,
539 a21, rs_A,
540 z21, rs_Z );
542 m_ahead,
543 &inv_tau11,
544 z21, rs_Z );
545
546 // FLA_Copy( first_elem, a21_t );
547 *a21_t = first_elem;
548 }
549
550 /*------------------------------------------------------------*/
551
552 }
553
554 // FLA_Obj_free( &d );
555 // FLA_Obj_free( &f );
556 FLA_free( buff_d );
557 FLA_free( buff_f );
558
559 return FLA_SUCCESS;
560}
FLA_Error FLA_Househ2_UT_l_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *tau)
Definition FLA_Househ2_UT.c:274
void bl1_daxpyv(conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy)
Definition bl1_axpyv.c:21
void bl1_dcopyv(conj1_t conj, int m, double *x, int incx, double *y, int incy)
Definition bl1_copyv.c:42
void bl1_ddot(conj1_t conj, int n, double *x, int incx, double *y, int incy, double *rho)
Definition bl1_dot.c:26
void bl1_dgemv(trans1_t transa, conj1_t conjx, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
Definition bl1_gemv.c:69
void bl1_dscalv(conj1_t conj, int n, double *alpha, double *x, int incx)
Definition bl1_scalv.c:24
void bl1_dsymv(uplo1_t uplo, int m, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
Definition bl1_symv.c:56
void bl1_dsetm(int m, int n, double *sigma, double *a, int a_rs, int a_cs)
Definition bl1_setm.c:45

References bl1_daxpyv(), bl1_dcopyv(), bl1_ddot(), bl1_dgemv(), bl1_dscalv(), bl1_dsetm(), bl1_dsymv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_step_opt_var3().

◆ FLA_Tridiag_UT_l_step_ops_var3()

FLA_Error FLA_Tridiag_UT_l_step_ops_var3 ( int  m_A,
int  m_T,
float buff_A,
int  rs_A,
int  cs_A,
float buff_Z,
int  rs_Z,
int  cs_Z,
float buff_T,
int  rs_T,
int  cs_T 
)
123{
124 float* buff_2 = FLA_FLOAT_PTR( FLA_TWO );
125 float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
126 float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
128
129 float first_elem, last_elem;
130 float beta;
131 float inv_tau11;
132 float minus_inv_tau11;
133 int i;
134
135 // b_alg = FLA_Obj_length( T );
136 int b_alg = m_T;
137
138 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &d );
139 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
140 float* buff_d = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
141 float* buff_f = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
142 int inc_d = 1;
143 int inc_f = 1;
144
145 // FLA_Set( FLA_ZERO, Z );
146 bl1_ssetm( m_A,
147 b_alg,
148 buff_0,
149 buff_Z, rs_Z, cs_Z );
150
151 for ( i = 0; i < b_alg; ++i )
152 {
153 float* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
154 float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
155 float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
156 float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
157 float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
158
159 float* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
160 float* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
161 float* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
162
163 float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
164 float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
165
166 float* d01 = buff_d + (0 )*inc_d;
167
168 float* f01 = buff_f + (0 )*inc_f;
169
170 float* a10t_r = a10t + (i-1)*cs_A + (0 )*rs_A;
171
172 float* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
173 float* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
174
175 float* ABL = a10t;
176 float* ZBL = z10t;
177
178 float* a2 = alpha11;
179
180 int m_ahead = m_A - i - 1;
181 int m_behind = i;
182 int n_behind = i;
183
184 /*------------------------------------------------------------*/
185
186 if ( m_behind > 0 )
187 {
188 // FLA_Copy( a10t_r, last_elem );
189 // FLA_Set( FLA_ONE, a10t_r );
190 last_elem = *a10t_r;
191 *a10t_r = *buff_1;
192 }
193
194 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, z10t, FLA_ONE, a2 );
195 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a10t, FLA_ONE, a2 );
198 m_ahead + 1,
199 n_behind,
200 buff_m1,
201 ABL, rs_A, cs_A,
202 z10t, cs_Z,
203 buff_1,
204 a2, rs_A );
207 m_ahead + 1,
208 n_behind,
209 buff_m1,
210 ZBL, rs_Z, cs_Z,
211 a10t, cs_A,
212 buff_1,
213 a2, rs_A );
214
215 if ( m_behind > 0 )
216 {
217 // FLA_Copy( last_elem, a10t_r );
218 *a10t_r = last_elem;
219 }
220
221 if ( m_ahead > 0 )
222 {
223 // FLA_Househ2_UT( FLA_LEFT,
224 // a21_t,
225 // a21_b, tau11 );
227 a21_t,
228 a21_b, rs_A,
229 tau11 );
230
231 // FLA_Set( FLA_ONE, inv_tau11 );
232 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
233 // FLA_Copy( inv_tau11, minus_inv_tau11 );
234 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
237
238 // FLA_Copy( a21_t, first_elem );
239 // FLA_Set( FLA_ONE, a21_t );
240 first_elem = *a21_t;
241 *a21_t = *buff_1;
242
243 // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, z21 );
245 m_ahead,
246 buff_1,
247 A22, rs_A, cs_A,
248 a21, rs_A,
249 buff_0,
250 z21, rs_Z );
251
252 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, d01 );
253 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, a21, FLA_ZERO, f01 );
256 m_ahead,
257 n_behind,
258 buff_1,
259 A20, rs_A, cs_A,
260 a21, rs_A,
261 buff_0,
262 d01, inc_d );
265 m_ahead,
266 n_behind,
267 buff_1,
268 Z20, rs_Z, cs_Z,
269 a21, rs_A,
270 buff_0,
271 f01, inc_f );
272
273 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f01, FLA_ONE, z21 );
274 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, d01, FLA_ONE, z21 );
277 m_ahead,
278 n_behind,
279 buff_m1,
280 A20, rs_A, cs_A,
281 f01, inc_f,
282 buff_1,
283 z21, rs_Z );
286 m_ahead,
287 n_behind,
288 buff_m1,
289 Z20, rs_Z, cs_Z,
290 d01, inc_d,
291 buff_1,
292 z21, rs_Z );
293
294 // FLA_Copy( d01, t01 );
296 n_behind,
297 d01, inc_d,
298 t01, rs_T );
299
300 // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
301 // FLA_Inv_scal( FLA_TWO, beta );
303 m_ahead,
304 a21, rs_A,
305 z21, rs_Z,
306 &beta );
308
309 // FLA_Scal( minus_inv_tau11, beta );
310 // FLA_Axpy( beta, a21, z21 );
311 // FLA_Scal( inv_tau11, z21 );
314 m_ahead,
315 &beta,
316 a21, rs_A,
317 z21, rs_Z );
319 m_ahead,
320 &inv_tau11,
321 z21, rs_Z );
322
323 // FLA_Copy( first_elem, a21_t );
324 *a21_t = first_elem;
325 }
326
327 /*------------------------------------------------------------*/
328
329 }
330
331 // FLA_Obj_free( &d );
332 // FLA_Obj_free( &f );
333 FLA_free( buff_d );
334 FLA_free( buff_f );
335
336 return FLA_SUCCESS;
337}
FLA_Error FLA_Househ2_UT_l_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *tau)
Definition FLA_Househ2_UT.c:160
void bl1_saxpyv(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy)
Definition bl1_axpyv.c:13
void bl1_scopyv(conj1_t conj, int m, float *x, int incx, float *y, int incy)
Definition bl1_copyv.c:35
void bl1_sdot(conj1_t conj, int n, float *x, int incx, float *y, int incy, float *rho)
Definition bl1_dot.c:13
void bl1_sgemv(trans1_t transa, conj1_t conjx, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
Definition bl1_gemv.c:13
void bl1_sscalv(conj1_t conj, int n, float *alpha, float *x, int incx)
Definition bl1_scalv.c:13
void bl1_ssymv(uplo1_t uplo, int m, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
Definition bl1_symv.c:13
void bl1_ssetm(int m, int n, float *sigma, float *a, int a_rs, int a_cs)
Definition bl1_setm.c:29

References bl1_saxpyv(), bl1_scopyv(), bl1_sdot(), bl1_sgemv(), bl1_sscalv(), bl1_ssetm(), bl1_ssymv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_step_opt_var3().

◆ FLA_Tridiag_UT_l_step_opt_var3()

FLA_Error FLA_Tridiag_UT_l_step_opt_var3 ( FLA_Obj  A,
FLA_Obj  Z,
FLA_Obj  T 
)
28{
29 FLA_Datatype datatype;
30 int m_A, m_T;
31 int rs_A, cs_A;
32 int rs_Z, cs_Z;
33 int rs_T, cs_T;
34
35 datatype = FLA_Obj_datatype( A );
36
37 m_A = FLA_Obj_length( A );
38 m_T = FLA_Obj_length( T );
39
42
45
48
49
50 switch ( datatype )
51 {
52 case FLA_FLOAT:
53 {
54 float* buff_A = FLA_FLOAT_PTR( A );
55 float* buff_Z = FLA_FLOAT_PTR( Z );
56 float* buff_T = FLA_FLOAT_PTR( T );
57
59 m_T,
62 buff_T, rs_T, cs_T );
63
64 break;
65 }
66
67 case FLA_DOUBLE:
68 {
69 double* buff_A = FLA_DOUBLE_PTR( A );
70 double* buff_Z = FLA_DOUBLE_PTR( Z );
71 double* buff_T = FLA_DOUBLE_PTR( T );
72
74 m_T,
77 buff_T, rs_T, cs_T );
78
79 break;
80 }
81
82 case FLA_COMPLEX:
83 {
87
89 m_T,
92 buff_T, rs_T, cs_T );
93
94 break;
95 }
96
98 {
102
104 m_T,
105 buff_A, rs_A, cs_A,
106 buff_Z, rs_Z, cs_Z,
107 buff_T, rs_T, cs_T );
108
109 break;
110 }
111 }
112
113 return FLA_SUCCESS;
114}
FLA_Error FLA_Tridiag_UT_l_step_opd_var3(int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_Z, int rs_Z, int cs_Z, double *buff_T, int rs_T, int cs_T)
Definition FLA_Tridiag_UT_l_opt_var3.c:341
FLA_Error FLA_Tridiag_UT_l_step_opc_var3(int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_T, int rs_T, int cs_T)
Definition FLA_Tridiag_UT_l_opt_var3.c:564
FLA_Error FLA_Tridiag_UT_l_step_ops_var3(int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_Z, int rs_Z, int cs_Z, float *buff_T, int rs_T, int cs_T)
Definition FLA_Tridiag_UT_l_opt_var3.c:118
FLA_Error FLA_Tridiag_UT_l_step_opz_var3(int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_T, int rs_T, int cs_T)
Definition FLA_Tridiag_UT_l_opt_var3.c:788
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition FLA_Query.c:167
dim_t FLA_Obj_length(FLA_Obj obj)
Definition FLA_Query.c:116
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition FLA_Query.c:174
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition FLA_Query.c:13
int FLA_Datatype
Definition FLA_type_defs.h:49
Definition blis_type_defs.h:138

References FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Tridiag_UT_l_step_opc_var3(), FLA_Tridiag_UT_l_step_opd_var3(), FLA_Tridiag_UT_l_step_ops_var3(), FLA_Tridiag_UT_l_step_opz_var3(), and i.

Referenced by FLA_Tridiag_UT_l_blk_var3(), and FLA_Tridiag_UT_l_opt_var3().

◆ FLA_Tridiag_UT_l_step_opz_var3()

FLA_Error FLA_Tridiag_UT_l_step_opz_var3 ( int  m_A,
int  m_T,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_Z,
int  rs_Z,
int  cs_Z,
dcomplex buff_T,
int  rs_T,
int  cs_T 
)
793{
798
803 int i;
804
805 // b_alg = FLA_Obj_length( T );
806 int b_alg = m_T;
807
808 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &d );
809 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
810 dcomplex* buff_d = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
811 dcomplex* buff_f = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
812 int inc_d = 1;
813 int inc_f = 1;
814
815 // FLA_Set( FLA_ZERO, Z );
816 bl1_zsetm( m_A,
817 b_alg,
818 buff_0,
819 buff_Z, rs_Z, cs_Z );
820
821 for ( i = 0; i < b_alg; ++i )
822 {
823 dcomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
824 dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
825 dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
826 dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
827 dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
828
829 dcomplex* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
830 dcomplex* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
831 dcomplex* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
832
833 dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
834 dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
835
836 dcomplex* d01 = buff_d + (0 )*inc_d;
837
838 dcomplex* f01 = buff_f + (0 )*inc_f;
839
840 dcomplex* a10t_r = a10t + (i-1)*cs_A + (0 )*rs_A;
841
842 dcomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
843 dcomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
844
845 dcomplex* ABL = a10t;
846 dcomplex* ZBL = z10t;
847
849
850 int m_ahead = m_A - i - 1;
851 int m_behind = i;
852 int n_behind = i;
853
854 /*------------------------------------------------------------*/
855
856 if ( m_behind > 0 )
857 {
858 // FLA_Copy( a10t_r, last_elem );
859 // FLA_Set( FLA_ONE, a10t_r );
860 last_elem = *a10t_r;
861 *a10t_r = *buff_1;
862 }
863
864 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, z10t, FLA_ONE, a2 );
865 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a10t, FLA_ONE, a2 );
868 m_ahead + 1,
869 n_behind,
870 buff_m1,
871 ABL, rs_A, cs_A,
872 z10t, cs_Z,
873 buff_1,
874 a2, rs_A );
877 m_ahead + 1,
878 n_behind,
879 buff_m1,
880 ZBL, rs_Z, cs_Z,
881 a10t, cs_A,
882 buff_1,
883 a2, rs_A );
884
885 if ( m_behind > 0 )
886 {
887 // FLA_Copy( last_elem, a10t_r );
888 *a10t_r = last_elem;
889 }
890
891 if ( m_ahead > 0 )
892 {
893 // FLA_Househ2_UT( FLA_LEFT,
894 // a21_t,
895 // a21_b, tau11 );
897 a21_t,
898 a21_b, rs_A,
899 tau11 );
900
901 // FLA_Set( FLA_ONE, inv_tau11 );
902 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
903 // FLA_Copy( inv_tau11, minus_inv_tau11 );
904 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
907
908 // FLA_Copy( a21_t, first_elem );
909 // FLA_Set( FLA_ONE, a21_t );
910 first_elem = *a21_t;
911 *a21_t = *buff_1;
912
913 // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, z21 );
916 m_ahead,
917 buff_1,
918 A22, rs_A, cs_A,
919 a21, rs_A,
920 buff_0,
921 z21, rs_Z );
922
923 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, d01 );
924 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, a21, FLA_ZERO, f01 );
927 m_ahead,
928 n_behind,
929 buff_1,
930 A20, rs_A, cs_A,
931 a21, rs_A,
932 buff_0,
933 d01, inc_d );
936 m_ahead,
937 n_behind,
938 buff_1,
939 Z20, rs_Z, cs_Z,
940 a21, rs_A,
941 buff_0,
942 f01, inc_f );
943
944 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f01, FLA_ONE, z21 );
945 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, d01, FLA_ONE, z21 );
948 m_ahead,
949 n_behind,
950 buff_m1,
951 A20, rs_A, cs_A,
952 f01, inc_f,
953 buff_1,
954 z21, rs_Z );
957 m_ahead,
958 n_behind,
959 buff_m1,
960 Z20, rs_Z, cs_Z,
961 d01, inc_d,
962 buff_1,
963 z21, rs_Z );
964
965 // FLA_Copy( d01, t01 );
967 n_behind,
968 d01, inc_d,
969 t01, rs_T );
970
971 // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
972 // FLA_Inv_scal( FLA_TWO, beta );
974 m_ahead,
975 a21, rs_A,
976 z21, rs_Z,
977 &beta );
979
980 // FLA_Scal( minus_inv_tau11, beta );
981 // FLA_Axpy( beta, a21, z21 );
982 // FLA_Scal( inv_tau11, z21 );
985 m_ahead,
986 &beta,
987 a21, rs_A,
988 z21, rs_Z );
990 m_ahead,
991 &inv_tau11,
992 z21, rs_Z );
993
994 // FLA_Copy( first_elem, a21_t );
995 *a21_t = first_elem;
996 }
997
998 /*------------------------------------------------------------*/
999
1000 }
1001
1002 // FLA_Obj_free( &d );
1003 // FLA_Obj_free( &f );
1004 FLA_free( buff_d );
1005 FLA_free( buff_f );
1006
1007 return FLA_SUCCESS;
1008}
FLA_Error FLA_Househ2_UT_l_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *tau)
Definition FLA_Househ2_UT.c:521
void bl1_zaxpyv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy)
Definition bl1_axpyv.c:60
void bl1_zcopyv(conj1_t conj, int m, dcomplex *x, int incx, dcomplex *y, int incy)
Definition bl1_copyv.c:63
void bl1_zdot(conj1_t conj, int n, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *rho)
Definition bl1_dot.c:65
bl1_zscals(beta, rho_yz)
void bl1_zgemv(trans1_t transa, conj1_t conjx, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition bl1_gemv.c:255
void bl1_zhemv(uplo1_t uplo, conj1_t conj, int m, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition bl1_hemv.c:134
void bl1_zscalv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx)
Definition bl1_scalv.c:72
void bl1_zsetm(int m, int n, dcomplex *sigma, dcomplex *a, int a_rs, int a_cs)
Definition bl1_setm.c:78

References bl1_zaxpyv(), bl1_zcopyv(), bl1_zdot(), bl1_zgemv(), bl1_zhemv(), bl1_zscals(), bl1_zscalv(), bl1_zsetm(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_step_opt_var3().