libflame revision_anchor
Functions
FLA_Tridiag_UT_l_fus_var2.c File Reference

(r)

Functions

FLA_Error FLA_Tridiag_UT_l_ofu_var2 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Tridiag_UT_l_step_ofu_var2 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Tridiag_UT_l_step_ofs_var2 (int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Tridiag_UT_l_step_ofd_var2 (int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Tridiag_UT_l_step_ofc_var2 (int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Tridiag_UT_l_step_ofz_var2 (int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T)
 

Function Documentation

◆ FLA_Tridiag_UT_l_ofu_var2()

FLA_Error FLA_Tridiag_UT_l_ofu_var2 ( FLA_Obj  A,
FLA_Obj  T 
)
14{
16}
FLA_Error FLA_Tridiag_UT_l_step_ofu_var2(FLA_Obj A, FLA_Obj T)
Definition FLA_Tridiag_UT_l_fus_var2.c:18
int i
Definition bl1_axmyv2.c:145

References FLA_Tridiag_UT_l_step_ofu_var2(), and i.

◆ FLA_Tridiag_UT_l_step_ofc_var2()

FLA_Error FLA_Tridiag_UT_l_step_ofc_var2 ( int  m_A,
int  m_T,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_T,
int  rs_T,
int  cs_T 
)
557{
562
569 int i;
570
571 // b_alg = FLA_Obj_length( T );
572 int b_alg = m_T;
573
574 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
575 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
576 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
577 scomplex* buff_u = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
578 scomplex* buff_z = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
579 scomplex* buff_w = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
580 int inc_u = 1;
581 int inc_z = 1;
582 int inc_w = 1;
583
584 // Initialize some variables (only to prevent compiler warnings).
587
588 for ( i = 0; i < b_alg; ++i )
589 {
590 scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
591 scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
592 scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
593 scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
594
595 scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
596 scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
597
599 scomplex* u21 = buff_u + (i+1)*inc_u;
600
601 scomplex* zeta11 = buff_z + (i )*inc_z;
602 scomplex* z21 = buff_z + (i+1)*inc_z;
603
604 scomplex* w21 = buff_w + (i+1)*inc_w;
605
606 scomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
607 scomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
608
609 int m_ahead = m_A - i - 1;
610 int m_behind = i;
611 int n_behind = i;
612
613 /*------------------------------------------------------------*/
614
615 if ( m_behind > 0 )
616 {
617 // FLA_Copy( upsilon11, minus_upsilon11 );
618 // FLA_Scal( FLA_MINUS_ONE, minus_upsilon11 );
619 // FLA_Copy( minus_upsilon11, minus_conj_upsilon11 );
622
623 // FLA_Copy( zeta11, minus_zeta11 );
624 // FLA_Scal( FLA_MINUS_ONE, minus_zeta11 );
625 // FLA_Copy( minus_zeta11, minus_conj_zeta11 );
628
629 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon11, zeta11, alpha11 );
630 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_zeta11, upsilon11, alpha11 );
632 1,
634 zeta11, 1,
635 alpha11, 1 );
637 1,
639 upsilon11, 1,
640 alpha11, 1 );
641
642 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_zeta11, u21, a21 );
643 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_upsilon11, z21, a21 );
645 m_ahead,
647 u21, inc_u,
648 a21, rs_A );
650 m_ahead,
652 z21, inc_z,
653 a21, rs_A );
654 }
655
656 if ( m_ahead > 0 )
657 {
658 // FLA_Househ2_UT( FLA_LEFT,
659 // a21_t,
660 // a21_b, tau11 );
662 a21_t,
663 a21_b, rs_A,
664 tau11 );
665
666 // FLA_Set( FLA_ONE, inv_tau11 );
667 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
668 // FLA_Copy( inv_tau11, minus_inv_tau11 );
669 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
672
673 // FLA_Copy( a21_t, first_elem );
674 // FLA_Set( FLA_ONE, a21_t );
675 first_elem = *a21_t;
676 *a21_t = *buff_1;
677 }
678
679 if ( m_behind > 0 && m_ahead > 0 )
680 {
681 // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, u21, z21, A22 );
682 // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, w21 );
684 buff_m1,
685 u21, inc_u,
686 z21, inc_z,
687 A22, rs_A, cs_A,
688 a21, rs_A,
689 w21, inc_w );
690 }
691 else if ( m_ahead > 0 )
692 {
693 // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, w21 );
696 m_ahead,
697 buff_1,
698 A22, rs_A, cs_A,
699 a21, rs_A,
700 buff_0,
701 w21, inc_w );
702 }
703
704 if ( m_ahead > 0 )
705 {
706 // FLA_Copy( a21, u21 );
707 // FLA_Copy( w21, z21 );
709 m_ahead,
710 a21, rs_A,
711 u21, inc_u );
713 m_ahead,
714 w21, inc_w,
715 z21, inc_z );
716
717 // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
718 // FLA_Inv_scal( FLA_TWO, beta );
720 m_ahead,
721 a21, rs_A,
722 z21, inc_z,
723 &beta );
725
726 // FLA_Scal( minus_inv_tau11, beta );
727 // FLA_Axpy( beta, a21, z21 );
728 // FLA_Scal( inv_tau11, z21 );
731 m_ahead,
732 &beta,
733 a21, rs_A,
734 z21, inc_z );
736 m_ahead,
737 &inv_tau11,
738 z21, inc_z );
739
740 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
743 m_ahead,
744 n_behind,
745 buff_1,
746 A20, rs_A, cs_A,
747 a21, rs_A,
748 buff_0,
749 t01, rs_T );
750
751 // FLA_Copy( first_elem, a21_t );
752 *a21_t = first_elem;
753 }
754
755 if ( m_behind + 1 == b_alg && m_ahead > 0 )
756 {
757 // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, u21, z21, A22 );
760 m_ahead,
761 buff_m1,
762 u21, inc_u,
763 z21, inc_z,
764 A22, rs_A, cs_A );
765 }
766
767 /*------------------------------------------------------------*/
768
769 }
770
771 // FLA_Obj_free( &u );
772 // FLA_Obj_free( &z );
773 // FLA_Obj_free( &w );
774 FLA_free( buff_u );
775 FLA_free( buff_z );
776 FLA_free( buff_w );
777
778 return FLA_SUCCESS;
779}
FLA_Error FLA_Fused_Her2_Ax_l_opc_var1(int m_A, scomplex *buff_beta, scomplex *buff_u, int inc_u, scomplex *buff_z, int inc_z, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_x, int inc_x, scomplex *buff_w, int inc_w)
Definition FLA_Fused_Her2_Ax_l_opt_var1.c:329
FLA_Obj FLA_TWO
Definition FLA_Init.c:17
FLA_Obj FLA_MINUS_ONE
Definition FLA_Init.c:22
FLA_Obj FLA_ZERO
Definition FLA_Init.c:20
FLA_Obj FLA_ONE
Definition FLA_Init.c:18
void FLA_free(void *ptr)
Definition FLA_Memory.c:247
void * FLA_malloc(size_t size)
Definition FLA_Memory.c:111
FLA_Error FLA_Househ2_UT_l_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *tau)
Definition FLA_Househ2_UT.c:390
void bl1_caxpyv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
Definition bl1_axpyv.c:29
void bl1_ccopyv(conj1_t conj, int m, scomplex *x, int incx, scomplex *y, int incy)
Definition bl1_copyv.c:49
void bl1_cdot(conj1_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho)
Definition bl1_dot.c:39
void bl1_cgemv(trans1_t transa, conj1_t conjx, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition bl1_gemv.c:125
void bl1_chemv(uplo1_t uplo, conj1_t conj, int m, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition bl1_hemv.c:35
void bl1_cher2(uplo1_t uplo, conj1_t conj, int m, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy, scomplex *a, int a_rs, int a_cs)
Definition bl1_her2.c:33
void bl1_cscalv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx)
Definition bl1_scalv.c:46
@ BLIS1_LOWER_TRIANGULAR
Definition blis_type_defs.h:62
@ BLIS1_NO_TRANSPOSE
Definition blis_type_defs.h:54
@ BLIS1_CONJ_TRANSPOSE
Definition blis_type_defs.h:57
@ BLIS1_CONJUGATE
Definition blis_type_defs.h:82
@ BLIS1_NO_CONJUGATE
Definition blis_type_defs.h:81
Definition blis_type_defs.h:133

References bl1_caxpyv(), bl1_ccopyv(), bl1_cdot(), bl1_cgemv(), bl1_chemv(), bl1_cher2(), bl1_cscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Fused_Her2_Ax_l_opc_var1(), FLA_Househ2_UT_l_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_step_ofu_var2().

◆ FLA_Tridiag_UT_l_step_ofd_var2()

FLA_Error FLA_Tridiag_UT_l_step_ofd_var2 ( int  m_A,
int  m_T,
double buff_A,
int  rs_A,
int  cs_A,
double buff_T,
int  rs_T,
int  cs_T 
)
329{
330 double* buff_2 = FLA_DOUBLE_PTR( FLA_TWO );
331 double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
332 double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
334
335 double first_elem;
336 double beta;
337 double inv_tau11;
338 double minus_inv_tau11;
341 int i;
342
343 // b_alg = FLA_Obj_length( T );
344 int b_alg = m_T;
345
346 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
347 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
348 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
349 double* buff_u = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
350 double* buff_z = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
351 double* buff_w = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
352 int inc_u = 1;
353 int inc_z = 1;
354 int inc_w = 1;
355
356 // Initialize some variables (only to prevent compiler warnings).
359
360 for ( i = 0; i < b_alg; ++i )
361 {
362 double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
363 double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
364 double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
365 double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
366
367 double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
368 double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
369
370 double* upsilon11= buff_u + (i )*inc_u;
371 double* u21 = buff_u + (i+1)*inc_u;
372
373 double* zeta11 = buff_z + (i )*inc_z;
374 double* z21 = buff_z + (i+1)*inc_z;
375
376 double* w21 = buff_w + (i+1)*inc_w;
377
378 double* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
379 double* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
380
381 int m_ahead = m_A - i - 1;
382 int m_behind = i;
383 int n_behind = i;
384
385 /*------------------------------------------------------------*/
386
387 if ( m_behind > 0 )
388 {
389 // FLA_Copy( upsilon11, minus_upsilon11 );
390 // FLA_Scal( FLA_MINUS_ONE, minus_upsilon11 );
391 // FLA_Copy( minus_upsilon11, minus_conj_upsilon11 );
394
395 // FLA_Copy( zeta11, minus_zeta11 );
396 // FLA_Scal( FLA_MINUS_ONE, minus_zeta11 );
397 // FLA_Copy( minus_zeta11, minus_conj_zeta11 );
400
401 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon11, zeta11, alpha11 );
402 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_zeta11, upsilon11, alpha11 );
404 1,
406 zeta11, 1,
407 alpha11, 1 );
409 1,
411 upsilon11, 1,
412 alpha11, 1 );
413
414 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_zeta11, u21, a21 );
415 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_upsilon11, z21, a21 );
417 m_ahead,
419 u21, inc_u,
420 a21, rs_A );
422 m_ahead,
424 z21, inc_z,
425 a21, rs_A );
426 }
427
428 if ( m_ahead > 0 )
429 {
430 // FLA_Househ2_UT( FLA_LEFT,
431 // a21_t,
432 // a21_b, tau11 );
434 a21_t,
435 a21_b, rs_A,
436 tau11 );
437
438 // FLA_Set( FLA_ONE, inv_tau11 );
439 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
440 // FLA_Copy( inv_tau11, minus_inv_tau11 );
441 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
444
445 // FLA_Copy( a21_t, first_elem );
446 // FLA_Set( FLA_ONE, a21_t );
447 first_elem = *a21_t;
448 *a21_t = *buff_1;
449 }
450
451 if ( m_behind > 0 && m_ahead > 0 )
452 {
453 // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, u21, z21, A22 );
454 // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, w21 );
456 buff_m1,
457 u21, inc_u,
458 z21, inc_z,
459 A22, rs_A, cs_A,
460 a21, rs_A,
461 w21, inc_w );
462 }
463 else if ( m_ahead > 0 )
464 {
465 // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, w21 );
467 m_ahead,
468 buff_1,
469 A22, rs_A, cs_A,
470 a21, rs_A,
471 buff_0,
472 w21, inc_w );
473 }
474
475 if ( m_ahead > 0 )
476 {
477 // FLA_Copy( a21, u21 );
478 // FLA_Copy( w21, z21 );
480 m_ahead,
481 a21, rs_A,
482 u21, inc_u );
484 m_ahead,
485 w21, inc_w,
486 z21, inc_z );
487
488 // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
489 // FLA_Inv_scal( FLA_TWO, beta );
491 m_ahead,
492 a21, rs_A,
493 z21, inc_z,
494 &beta );
496
497 // FLA_Scal( minus_inv_tau11, beta );
498 // FLA_Axpy( beta, a21, z21 );
499 // FLA_Scal( inv_tau11, z21 );
502 m_ahead,
503 &beta,
504 a21, rs_A,
505 z21, inc_z );
507 m_ahead,
508 &inv_tau11,
509 z21, inc_z );
510
511 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
514 m_ahead,
515 n_behind,
516 buff_1,
517 A20, rs_A, cs_A,
518 a21, rs_A,
519 buff_0,
520 t01, rs_T );
521
522 // FLA_Copy( first_elem, a21_t );
523 *a21_t = first_elem;
524 }
525
526 if ( m_behind + 1 == b_alg && m_ahead > 0 )
527 {
528 // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, u21, z21, A22 );
530 m_ahead,
531 buff_m1,
532 u21, inc_u,
533 z21, inc_z,
534 A22, rs_A, cs_A );
535 }
536
537 /*------------------------------------------------------------*/
538
539 }
540
541 // FLA_Obj_free( &u );
542 // FLA_Obj_free( &z );
543 // FLA_Obj_free( &w );
544 FLA_free( buff_u );
545 FLA_free( buff_z );
546 FLA_free( buff_w );
547
548 return FLA_SUCCESS;
549}
FLA_Error FLA_Fused_Her2_Ax_l_opd_var1(int m_A, double *buff_beta, double *buff_u, int inc_u, double *buff_z, int inc_z, double *buff_A, int rs_A, int cs_A, double *buff_x, int inc_x, double *buff_w, int inc_w)
Definition FLA_Fused_Her2_Ax_l_opt_var1.c:246
FLA_Error FLA_Househ2_UT_l_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *tau)
Definition FLA_Househ2_UT.c:274
void bl1_daxpyv(conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy)
Definition bl1_axpyv.c:21
void bl1_dcopyv(conj1_t conj, int m, double *x, int incx, double *y, int incy)
Definition bl1_copyv.c:42
void bl1_ddot(conj1_t conj, int n, double *x, int incx, double *y, int incy, double *rho)
Definition bl1_dot.c:26
void bl1_dgemv(trans1_t transa, conj1_t conjx, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
Definition bl1_gemv.c:69
void bl1_dscalv(conj1_t conj, int n, double *alpha, double *x, int incx)
Definition bl1_scalv.c:24
void bl1_dsymv(uplo1_t uplo, int m, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
Definition bl1_symv.c:56
void bl1_dsyr2(uplo1_t uplo, int m, double *alpha, double *x, int incx, double *y, int incy, double *a, int a_rs, int a_cs)
Definition bl1_syr2.c:58

References bl1_daxpyv(), bl1_dcopyv(), bl1_ddot(), bl1_dgemv(), bl1_dscalv(), bl1_dsymv(), bl1_dsyr2(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, FLA_free(), FLA_Fused_Her2_Ax_l_opd_var1(), FLA_Househ2_UT_l_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_step_ofu_var2().

◆ FLA_Tridiag_UT_l_step_ofs_var2()

FLA_Error FLA_Tridiag_UT_l_step_ofs_var2 ( int  m_A,
int  m_T,
float buff_A,
int  rs_A,
int  cs_A,
float buff_T,
int  rs_T,
int  cs_T 
)
101{
102 float* buff_2 = FLA_FLOAT_PTR( FLA_TWO );
103 float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
104 float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
106
107 float first_elem;
108 float beta;
109 float inv_tau11;
110 float minus_inv_tau11;
113 int i;
114
115 // b_alg = FLA_Obj_length( T );
116 int b_alg = m_T;
117
118 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
119 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
120 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
121 float* buff_u = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
122 float* buff_z = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
123 float* buff_w = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
124 int inc_u = 1;
125 int inc_z = 1;
126 int inc_w = 1;
127
128 // Initialize some variables (only to prevent compiler warnings).
131
132 for ( i = 0; i < b_alg; ++i )
133 {
134 float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
135 float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
136 float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
137 float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
138
139 float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
140 float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
141
142 float* upsilon11= buff_u + (i )*inc_u;
143 float* u21 = buff_u + (i+1)*inc_u;
144
145 float* zeta11 = buff_z + (i )*inc_z;
146 float* z21 = buff_z + (i+1)*inc_z;
147
148 float* w21 = buff_w + (i+1)*inc_w;
149
150 float* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
151 float* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
152
153 int m_ahead = m_A - i - 1;
154 int m_behind = i;
155 int n_behind = i;
156
157 /*------------------------------------------------------------*/
158
159 if ( m_behind > 0 )
160 {
161 // FLA_Copy( upsilon11, minus_upsilon11 );
162 // FLA_Scal( FLA_MINUS_ONE, minus_upsilon11 );
163 // FLA_Copy( minus_upsilon11, minus_conj_upsilon11 );
166
167 // FLA_Copy( zeta11, minus_zeta11 );
168 // FLA_Scal( FLA_MINUS_ONE, minus_zeta11 );
169 // FLA_Copy( minus_zeta11, minus_conj_zeta11 );
172
173 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon11, zeta11, alpha11 );
174 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_zeta11, upsilon11, alpha11 );
176 1,
178 zeta11, 1,
179 alpha11, 1 );
181 1,
183 upsilon11, 1,
184 alpha11, 1 );
185
186 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_zeta11, u21, a21 );
187 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_upsilon11, z21, a21 );
189 m_ahead,
191 u21, inc_u,
192 a21, rs_A );
194 m_ahead,
196 z21, inc_z,
197 a21, rs_A );
198 }
199
200 if ( m_ahead > 0 )
201 {
202 // FLA_Househ2_UT( FLA_LEFT,
203 // a21_t,
204 // a21_b, tau11 );
206 a21_t,
207 a21_b, rs_A,
208 tau11 );
209
210 // FLA_Set( FLA_ONE, inv_tau11 );
211 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
212 // FLA_Copy( inv_tau11, minus_inv_tau11 );
213 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
216
217 // FLA_Copy( a21_t, first_elem );
218 // FLA_Set( FLA_ONE, a21_t );
219 first_elem = *a21_t;
220 *a21_t = *buff_1;
221 }
222
223 if ( m_behind > 0 && m_ahead > 0 )
224 {
225 // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, u21, z21, A22 );
226 // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, w21 );
228 buff_m1,
229 u21, inc_u,
230 z21, inc_z,
231 A22, rs_A, cs_A,
232 a21, rs_A,
233 w21, inc_w );
234 }
235 else if ( m_ahead > 0 )
236 {
237 // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, w21 );
239 m_ahead,
240 buff_1,
241 A22, rs_A, cs_A,
242 a21, rs_A,
243 buff_0,
244 w21, inc_w );
245 }
246
247 if ( m_ahead > 0 )
248 {
249 // FLA_Copy( a21, u21 );
250 // FLA_Copy( w21, z21 );
252 m_ahead,
253 a21, rs_A,
254 u21, inc_u );
256 m_ahead,
257 w21, inc_w,
258 z21, inc_z );
259
260 // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
261 // FLA_Inv_scal( FLA_TWO, beta );
263 m_ahead,
264 a21, rs_A,
265 z21, inc_z,
266 &beta );
268
269 // FLA_Scal( minus_inv_tau11, beta );
270 // FLA_Axpy( beta, a21, z21 );
271 // FLA_Scal( inv_tau11, z21 );
274 m_ahead,
275 &beta,
276 a21, rs_A,
277 z21, inc_z );
279 m_ahead,
280 &inv_tau11,
281 z21, inc_z );
282
283 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
286 m_ahead,
287 n_behind,
288 buff_1,
289 A20, rs_A, cs_A,
290 a21, rs_A,
291 buff_0,
292 t01, rs_T );
293
294 // FLA_Copy( first_elem, a21_t );
295 *a21_t = first_elem;
296 }
297
298 if ( m_behind + 1 == b_alg && m_ahead > 0 )
299 {
300 // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, u21, z21, A22 );
302 m_ahead,
303 buff_m1,
304 u21, inc_u,
305 z21, inc_z,
306 A22, rs_A, cs_A );
307 }
308
309 /*------------------------------------------------------------*/
310
311 }
312
313 // FLA_Obj_free( &u );
314 // FLA_Obj_free( &z );
315 // FLA_Obj_free( &w );
316 FLA_free( buff_u );
317 FLA_free( buff_z );
318 FLA_free( buff_w );
319
320 return FLA_SUCCESS;
321}
FLA_Error FLA_Fused_Her2_Ax_l_ops_var1(int m_A, float *buff_beta, float *buff_u, int inc_u, float *buff_z, int inc_z, float *buff_A, int rs_A, int cs_A, float *buff_x, int inc_x, float *buff_w, int inc_w)
Definition FLA_Fused_Her2_Ax_l_opt_var1.c:126
FLA_Error FLA_Househ2_UT_l_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *tau)
Definition FLA_Househ2_UT.c:160
void bl1_saxpyv(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy)
Definition bl1_axpyv.c:13
void bl1_scopyv(conj1_t conj, int m, float *x, int incx, float *y, int incy)
Definition bl1_copyv.c:35
void bl1_sdot(conj1_t conj, int n, float *x, int incx, float *y, int incy, float *rho)
Definition bl1_dot.c:13
void bl1_sgemv(trans1_t transa, conj1_t conjx, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
Definition bl1_gemv.c:13
void bl1_sscalv(conj1_t conj, int n, float *alpha, float *x, int incx)
Definition bl1_scalv.c:13
void bl1_ssymv(uplo1_t uplo, int m, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
Definition bl1_symv.c:13
void bl1_ssyr2(uplo1_t uplo, int m, float *alpha, float *x, int incx, float *y, int incy, float *a, int a_rs, int a_cs)
Definition bl1_syr2.c:13

References bl1_saxpyv(), bl1_scopyv(), bl1_sdot(), bl1_sgemv(), bl1_sscalv(), bl1_ssymv(), bl1_ssyr2(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, FLA_free(), FLA_Fused_Her2_Ax_l_ops_var1(), FLA_Househ2_UT_l_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_step_ofu_var2().

◆ FLA_Tridiag_UT_l_step_ofu_var2()

FLA_Error FLA_Tridiag_UT_l_step_ofu_var2 ( FLA_Obj  A,
FLA_Obj  T 
)
19{
20 FLA_Datatype datatype;
21 int m_A, m_T;
22 int rs_A, cs_A;
23 int rs_T, cs_T;
24
25 datatype = FLA_Obj_datatype( A );
26
27 m_A = FLA_Obj_length( A );
28 m_T = FLA_Obj_length( T );
29
32
35
36
37 switch ( datatype )
38 {
39 case FLA_FLOAT:
40 {
41 float* buff_A = FLA_FLOAT_PTR( A );
42 float* buff_T = FLA_FLOAT_PTR( T );
43
45 m_T,
47 buff_T, rs_T, cs_T );
48
49 break;
50 }
51
52 case FLA_DOUBLE:
53 {
54 double* buff_A = FLA_DOUBLE_PTR( A );
55 double* buff_T = FLA_DOUBLE_PTR( T );
56
58 m_T,
60 buff_T, rs_T, cs_T );
61
62 break;
63 }
64
65 case FLA_COMPLEX:
66 {
69
71 m_T,
73 buff_T, rs_T, cs_T );
74
75 break;
76 }
77
79 {
82
84 m_T,
86 buff_T, rs_T, cs_T );
87
88 break;
89 }
90 }
91
92 return FLA_SUCCESS;
93}
FLA_Error FLA_Tridiag_UT_l_step_ofd_var2(int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T)
Definition FLA_Tridiag_UT_l_fus_var2.c:325
FLA_Error FLA_Tridiag_UT_l_step_ofs_var2(int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T)
Definition FLA_Tridiag_UT_l_fus_var2.c:97
FLA_Error FLA_Tridiag_UT_l_step_ofz_var2(int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T)
Definition FLA_Tridiag_UT_l_fus_var2.c:783
FLA_Error FLA_Tridiag_UT_l_step_ofc_var2(int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T)
Definition FLA_Tridiag_UT_l_fus_var2.c:553
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition FLA_Query.c:167
dim_t FLA_Obj_length(FLA_Obj obj)
Definition FLA_Query.c:116
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition FLA_Query.c:174
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition FLA_Query.c:13
int FLA_Datatype
Definition FLA_type_defs.h:49
Definition blis_type_defs.h:138

References FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Tridiag_UT_l_step_ofc_var2(), FLA_Tridiag_UT_l_step_ofd_var2(), FLA_Tridiag_UT_l_step_ofs_var2(), FLA_Tridiag_UT_l_step_ofz_var2(), and i.

Referenced by FLA_Tridiag_UT_l_blf_var2(), and FLA_Tridiag_UT_l_ofu_var2().

◆ FLA_Tridiag_UT_l_step_ofz_var2()

FLA_Error FLA_Tridiag_UT_l_step_ofz_var2 ( int  m_A,
int  m_T,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_T,
int  rs_T,
int  cs_T 
)
787{
792
799 int i;
800
801 // b_alg = FLA_Obj_length( T );
802 int b_alg = m_T;
803
804 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
805 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
806 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
807 dcomplex* buff_u = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
808 dcomplex* buff_z = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
809 dcomplex* buff_w = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
810 int inc_u = 1;
811 int inc_z = 1;
812 int inc_w = 1;
813
814 // Initialize some variables (only to prevent compiler warnings).
817
818 for ( i = 0; i < b_alg; ++i )
819 {
820 dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
821 dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
822 dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
823 dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
824
825 dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
826 dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
827
829 dcomplex* u21 = buff_u + (i+1)*inc_u;
830
831 dcomplex* zeta11 = buff_z + (i )*inc_z;
832 dcomplex* z21 = buff_z + (i+1)*inc_z;
833
834 dcomplex* w21 = buff_w + (i+1)*inc_w;
835
836 dcomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
837 dcomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
838
839 int m_ahead = m_A - i - 1;
840 int m_behind = i;
841 int n_behind = i;
842
843 /*------------------------------------------------------------*/
844
845 if ( m_behind > 0 )
846 {
847 // FLA_Copy( upsilon11, minus_upsilon11 );
848 // FLA_Scal( FLA_MINUS_ONE, minus_upsilon11 );
849 // FLA_Copy( minus_upsilon11, minus_conj_upsilon11 );
852
853 // FLA_Copy( zeta11, minus_zeta11 );
854 // FLA_Scal( FLA_MINUS_ONE, minus_zeta11 );
855 // FLA_Copy( minus_zeta11, minus_conj_zeta11 );
858
859 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon11, zeta11, alpha11 );
860 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_zeta11, upsilon11, alpha11 );
862 1,
864 zeta11, 1,
865 alpha11, 1 );
867 1,
869 upsilon11, 1,
870 alpha11, 1 );
871
872 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_zeta11, u21, a21 );
873 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_upsilon11, z21, a21 );
875 m_ahead,
877 u21, inc_u,
878 a21, rs_A );
880 m_ahead,
882 z21, inc_z,
883 a21, rs_A );
884 }
885
886 if ( m_ahead > 0 )
887 {
888 // FLA_Househ2_UT( FLA_LEFT,
889 // a21_t,
890 // a21_b, tau11 );
892 a21_t,
893 a21_b, rs_A,
894 tau11 );
895
896 // FLA_Set( FLA_ONE, inv_tau11 );
897 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
898 // FLA_Copy( inv_tau11, minus_inv_tau11 );
899 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
902
903 // FLA_Copy( a21_t, first_elem );
904 // FLA_Set( FLA_ONE, a21_t );
905 first_elem = *a21_t;
906 *a21_t = *buff_1;
907 }
908
909 if ( m_behind > 0 && m_ahead > 0 )
910 {
911 // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, u21, z21, A22 );
912 // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, w21 );
914 buff_m1,
915 u21, inc_u,
916 z21, inc_z,
917 A22, rs_A, cs_A,
918 a21, rs_A,
919 w21, inc_w );
920 }
921 else if ( m_ahead > 0 )
922 {
923 // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, w21 );
926 m_ahead,
927 buff_1,
928 A22, rs_A, cs_A,
929 a21, rs_A,
930 buff_0,
931 w21, inc_w );
932 }
933
934 if ( m_ahead > 0 )
935 {
936 // FLA_Copy( a21, u21 );
937 // FLA_Copy( w21, z21 );
939 m_ahead,
940 a21, rs_A,
941 u21, inc_u );
943 m_ahead,
944 w21, inc_w,
945 z21, inc_z );
946
947 // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
948 // FLA_Inv_scal( FLA_TWO, beta );
950 m_ahead,
951 a21, rs_A,
952 z21, inc_z,
953 &beta );
955
956 // FLA_Scal( minus_inv_tau11, beta );
957 // FLA_Axpy( beta, a21, z21 );
958 // FLA_Scal( inv_tau11, z21 );
961 m_ahead,
962 &beta,
963 a21, rs_A,
964 z21, inc_z );
966 m_ahead,
967 &inv_tau11,
968 z21, inc_z );
969
970 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
973 m_ahead,
974 n_behind,
975 buff_1,
976 A20, rs_A, cs_A,
977 a21, rs_A,
978 buff_0,
979 t01, rs_T );
980
981 // FLA_Copy( first_elem, a21_t );
982 *a21_t = first_elem;
983 }
984
985 if ( m_behind + 1 == b_alg && m_ahead > 0 )
986 {
987 // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, u21, z21, A22 );
990 m_ahead,
991 buff_m1,
992 u21, inc_u,
993 z21, inc_z,
994 A22, rs_A, cs_A );
995 }
996
997 /*------------------------------------------------------------*/
998
999 }
1000
1001 // FLA_Obj_free( &u );
1002 // FLA_Obj_free( &z );
1003 // FLA_Obj_free( &w );
1004 FLA_free( buff_u );
1005 FLA_free( buff_z );
1006 FLA_free( buff_w );
1007
1008 return FLA_SUCCESS;
1009}
FLA_Error FLA_Fused_Her2_Ax_l_opz_var1(int m_A, dcomplex *buff_beta, dcomplex *buff_u, int inc_u, dcomplex *buff_z, int inc_z, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_x, int inc_x, dcomplex *buff_w, int inc_w)
Definition FLA_Fused_Her2_Ax_l_opt_var1.c:450
FLA_Error FLA_Househ2_UT_l_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *tau)
Definition FLA_Househ2_UT.c:521
void bl1_zaxpyv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy)
Definition bl1_axpyv.c:60
void bl1_zcopyv(conj1_t conj, int m, dcomplex *x, int incx, dcomplex *y, int incy)
Definition bl1_copyv.c:63
void bl1_zdot(conj1_t conj, int n, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *rho)
Definition bl1_dot.c:65
bl1_zscals(beta, rho_yz)
void bl1_zgemv(trans1_t transa, conj1_t conjx, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition bl1_gemv.c:255
void bl1_zhemv(uplo1_t uplo, conj1_t conj, int m, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition bl1_hemv.c:134
void bl1_zher2(uplo1_t uplo, conj1_t conj, int m, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *a, int a_rs, int a_cs)
Definition bl1_her2.c:121
void bl1_zscalv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx)
Definition bl1_scalv.c:72

References bl1_zaxpyv(), bl1_zcopyv(), bl1_zdot(), bl1_zgemv(), bl1_zhemv(), bl1_zher2(), bl1_zscals(), bl1_zscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Fused_Her2_Ax_l_opz_var1(), FLA_Househ2_UT_l_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_step_ofu_var2().