libflame revision_anchor
Functions
FLA_Tridiag_UT_l_opt_var2.c File Reference

(r)

Functions

FLA_Error FLA_Tridiag_UT_l_opt_var2 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Tridiag_UT_l_step_opt_var2 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Tridiag_UT_l_step_ops_var2 (int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Tridiag_UT_l_step_opd_var2 (int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Tridiag_UT_l_step_opc_var2 (int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Tridiag_UT_l_step_opz_var2 (int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T)
 

Function Documentation

◆ FLA_Tridiag_UT_l_opt_var2()

FLA_Error FLA_Tridiag_UT_l_opt_var2 ( FLA_Obj  A,
FLA_Obj  T 
)
14{
16}
FLA_Error FLA_Tridiag_UT_l_step_opt_var2(FLA_Obj A, FLA_Obj T)
Definition FLA_Tridiag_UT_l_opt_var2.c:18
int i
Definition bl1_axmyv2.c:145

References FLA_Tridiag_UT_l_step_opt_var2(), and i.

Referenced by FLA_Tridiag_UT_l().

◆ FLA_Tridiag_UT_l_step_opc_var2()

FLA_Error FLA_Tridiag_UT_l_step_opc_var2 ( int  m_A,
int  m_T,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_T,
int  rs_T,
int  cs_T 
)
549{
554
561 int i;
562
563 // b_alg = FLA_Obj_length( T );
564 int b_alg = m_T;
565
566 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
567 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
568 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
569 scomplex* buff_u = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
570 scomplex* buff_z = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
571 scomplex* buff_w = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
572 int inc_u = 1;
573 int inc_z = 1;
574 int inc_w = 1;
575
576 // Initialize some variables (only to prevent compiler warnings).
579
580 for ( i = 0; i < b_alg; ++i )
581 {
582 scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
583 scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
584 scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
585 scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
586
587 scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
588 scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
589
591 scomplex* u21 = buff_u + (i+1)*inc_u;
592
593 scomplex* zeta11 = buff_z + (i )*inc_z;
594 scomplex* z21 = buff_z + (i+1)*inc_z;
595
596 scomplex* w21 = buff_w + (i+1)*inc_w;
597
598 scomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
599 scomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
600
601 int m_ahead = m_A - i - 1;
602 int m_behind = i;
603 int n_behind = i;
604
605 /*------------------------------------------------------------*/
606
607 if ( m_behind > 0 )
608 {
609 // FLA_Copy( upsilon11, minus_upsilon11 );
610 // FLA_Scal( FLA_MINUS_ONE, minus_upsilon11 );
611 // FLA_Copy( minus_upsilon11, minus_conj_upsilon11 );
614
615 // FLA_Copy( zeta11, minus_zeta11 );
616 // FLA_Scal( FLA_MINUS_ONE, minus_zeta11 );
617 // FLA_Copy( minus_zeta11, minus_conj_zeta11 );
620
621 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon11, zeta11, alpha11 );
622 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_zeta11, upsilon11, alpha11 );
624 1,
626 zeta11, 1,
627 alpha11, 1 );
629 1,
631 upsilon11, 1,
632 alpha11, 1 );
633
634 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_zeta11, u21, a21 );
635 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_upsilon11, z21, a21 );
637 m_ahead,
639 u21, inc_u,
640 a21, rs_A );
642 m_ahead,
644 z21, inc_z,
645 a21, rs_A );
646 }
647
648 if ( m_ahead > 0 )
649 {
650 // FLA_Househ2_UT( FLA_LEFT,
651 // a21_t,
652 // a21_b, tau11 );
654 a21_t,
655 a21_b, rs_A,
656 tau11 );
657
658 // FLA_Set( FLA_ONE, inv_tau11 );
659 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
660 // FLA_Copy( inv_tau11, minus_inv_tau11 );
661 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
664
665 // FLA_Copy( a21_t, first_elem );
666 // FLA_Set( FLA_ONE, a21_t );
667 first_elem = *a21_t;
668 *a21_t = *buff_1;
669 }
670
671 if ( m_behind > 0 )
672 {
673 // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, u21, z21, A22 );
676 m_ahead,
677 buff_m1,
678 u21, inc_u,
679 z21, inc_z,
680 A22, rs_A, cs_A );
681 }
682
683 if ( m_ahead > 0 )
684 {
685 // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, w21 );
688 m_ahead,
689 buff_1,
690 A22, rs_A, cs_A,
691 a21, rs_A,
692 buff_0,
693 w21, inc_w );
694
695 // FLA_Copy( a21, u21 );
696 // FLA_Copy( w21, z21 );
698 m_ahead,
699 a21, rs_A,
700 u21, inc_u );
702 m_ahead,
703 w21, inc_w,
704 z21, inc_z );
705
706 // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
707 // FLA_Inv_scal( FLA_TWO, beta );
709 m_ahead,
710 a21, rs_A,
711 z21, inc_z,
712 &beta );
714
715 // FLA_Scal( minus_inv_tau11, beta );
716 // FLA_Axpy( beta, a21, z21 );
717 // FLA_Scal( inv_tau11, z21 );
720 m_ahead,
721 &beta,
722 a21, rs_A,
723 z21, inc_z );
725 m_ahead,
726 &inv_tau11,
727 z21, inc_z );
728
729 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
732 m_ahead,
733 n_behind,
734 buff_1,
735 A20, rs_A, cs_A,
736 a21, rs_A,
737 buff_0,
738 t01, rs_T );
739
740 // FLA_Copy( first_elem, a21_t );
741 *a21_t = first_elem;
742 }
743
744 if ( m_behind + 1 == b_alg && m_ahead > 0 )
745 {
746 // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, u21, z21, A22 );
749 m_ahead,
750 buff_m1,
751 u21, inc_u,
752 z21, inc_z,
753 A22, rs_A, cs_A );
754 }
755
756 /*------------------------------------------------------------*/
757
758 }
759
760 // FLA_Obj_free( &u );
761 // FLA_Obj_free( &z );
762 // FLA_Obj_free( &w );
763 FLA_free( buff_u );
764 FLA_free( buff_z );
765 FLA_free( buff_w );
766
767 return FLA_SUCCESS;
768}
FLA_Obj FLA_TWO
Definition FLA_Init.c:17
FLA_Obj FLA_MINUS_ONE
Definition FLA_Init.c:22
FLA_Obj FLA_ZERO
Definition FLA_Init.c:20
FLA_Obj FLA_ONE
Definition FLA_Init.c:18
void FLA_free(void *ptr)
Definition FLA_Memory.c:247
void * FLA_malloc(size_t size)
Definition FLA_Memory.c:111
FLA_Error FLA_Househ2_UT_l_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *tau)
Definition FLA_Househ2_UT.c:390
void bl1_caxpyv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
Definition bl1_axpyv.c:29
void bl1_ccopyv(conj1_t conj, int m, scomplex *x, int incx, scomplex *y, int incy)
Definition bl1_copyv.c:49
void bl1_cdot(conj1_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho)
Definition bl1_dot.c:39
void bl1_cgemv(trans1_t transa, conj1_t conjx, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition bl1_gemv.c:125
void bl1_chemv(uplo1_t uplo, conj1_t conj, int m, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition bl1_hemv.c:35
void bl1_cher2(uplo1_t uplo, conj1_t conj, int m, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy, scomplex *a, int a_rs, int a_cs)
Definition bl1_her2.c:33
void bl1_cscalv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx)
Definition bl1_scalv.c:46
@ BLIS1_LOWER_TRIANGULAR
Definition blis_type_defs.h:62
@ BLIS1_NO_TRANSPOSE
Definition blis_type_defs.h:54
@ BLIS1_CONJ_TRANSPOSE
Definition blis_type_defs.h:57
@ BLIS1_CONJUGATE
Definition blis_type_defs.h:82
@ BLIS1_NO_CONJUGATE
Definition blis_type_defs.h:81
Definition blis_type_defs.h:133

References bl1_caxpyv(), bl1_ccopyv(), bl1_cdot(), bl1_cgemv(), bl1_chemv(), bl1_cher2(), bl1_cscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_step_opt_var2().

◆ FLA_Tridiag_UT_l_step_opd_var2()

FLA_Error FLA_Tridiag_UT_l_step_opd_var2 ( int  m_A,
int  m_T,
double buff_A,
int  rs_A,
int  cs_A,
double buff_T,
int  rs_T,
int  cs_T 
)
325{
326 double* buff_2 = FLA_DOUBLE_PTR( FLA_TWO );
327 double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
328 double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
330
331 double first_elem;
332 double beta;
333 double inv_tau11;
334 double minus_inv_tau11;
337 int i;
338
339 // b_alg = FLA_Obj_length( T );
340 int b_alg = m_T;
341
342 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
343 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
344 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
345 double* buff_u = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
346 double* buff_z = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
347 double* buff_w = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
348 int inc_u = 1;
349 int inc_z = 1;
350 int inc_w = 1;
351
352 // Initialize some variables (only to prevent compiler warnings).
355
356 for ( i = 0; i < b_alg; ++i )
357 {
358 double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
359 double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
360 double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
361 double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
362
363 double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
364 double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
365
366 double* upsilon11= buff_u + (i )*inc_u;
367 double* u21 = buff_u + (i+1)*inc_u;
368
369 double* zeta11 = buff_z + (i )*inc_z;
370 double* z21 = buff_z + (i+1)*inc_z;
371
372 double* w21 = buff_w + (i+1)*inc_w;
373
374 double* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
375 double* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
376
377 int m_ahead = m_A - i - 1;
378 int m_behind = i;
379 int n_behind = i;
380
381 /*------------------------------------------------------------*/
382
383 if ( m_behind > 0 )
384 {
385 // FLA_Copy( upsilon11, minus_upsilon11 );
386 // FLA_Scal( FLA_MINUS_ONE, minus_upsilon11 );
387 // FLA_Copy( minus_upsilon11, minus_conj_upsilon11 );
390
391 // FLA_Copy( zeta11, minus_zeta11 );
392 // FLA_Scal( FLA_MINUS_ONE, minus_zeta11 );
393 // FLA_Copy( minus_zeta11, minus_conj_zeta11 );
396
397 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon11, zeta11, alpha11 );
398 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_zeta11, upsilon11, alpha11 );
400 1,
402 zeta11, 1,
403 alpha11, 1 );
405 1,
407 upsilon11, 1,
408 alpha11, 1 );
409
410 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_zeta11, u21, a21 );
411 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_upsilon11, z21, a21 );
413 m_ahead,
415 u21, inc_u,
416 a21, rs_A );
418 m_ahead,
420 z21, inc_z,
421 a21, rs_A );
422 }
423
424 if ( m_ahead > 0 )
425 {
426 // FLA_Househ2_UT( FLA_LEFT,
427 // a21_t,
428 // a21_b, tau11 );
430 a21_t,
431 a21_b, rs_A,
432 tau11 );
433
434 // FLA_Set( FLA_ONE, inv_tau11 );
435 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
436 // FLA_Copy( inv_tau11, minus_inv_tau11 );
437 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
440
441 // FLA_Copy( a21_t, first_elem );
442 // FLA_Set( FLA_ONE, a21_t );
443 first_elem = *a21_t;
444 *a21_t = *buff_1;
445 }
446
447 if ( m_behind > 0 )
448 {
449 // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, u21, z21, A22 );
451 m_ahead,
452 buff_m1,
453 u21, inc_u,
454 z21, inc_z,
455 A22, rs_A, cs_A );
456 }
457
458 if ( m_ahead > 0 )
459 {
460 // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, w21 );
462 m_ahead,
463 buff_1,
464 A22, rs_A, cs_A,
465 a21, rs_A,
466 buff_0,
467 w21, inc_w );
468
469 // FLA_Copy( a21, u21 );
470 // FLA_Copy( w21, z21 );
472 m_ahead,
473 a21, rs_A,
474 u21, inc_u );
476 m_ahead,
477 w21, inc_w,
478 z21, inc_z );
479
480 // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
481 // FLA_Inv_scal( FLA_TWO, beta );
483 m_ahead,
484 a21, rs_A,
485 z21, inc_z,
486 &beta );
488
489 // FLA_Scal( minus_inv_tau11, beta );
490 // FLA_Axpy( beta, a21, z21 );
491 // FLA_Scal( inv_tau11, z21 );
494 m_ahead,
495 &beta,
496 a21, rs_A,
497 z21, inc_z );
499 m_ahead,
500 &inv_tau11,
501 z21, inc_z );
502
503 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
506 m_ahead,
507 n_behind,
508 buff_1,
509 A20, rs_A, cs_A,
510 a21, rs_A,
511 buff_0,
512 t01, rs_T );
513
514 // FLA_Copy( first_elem, a21_t );
515 *a21_t = first_elem;
516 }
517
518 if ( m_behind + 1 == b_alg && m_ahead > 0 )
519 {
520 // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, u21, z21, A22 );
522 m_ahead,
523 buff_m1,
524 u21, inc_u,
525 z21, inc_z,
526 A22, rs_A, cs_A );
527 }
528
529 /*------------------------------------------------------------*/
530
531 }
532
533 // FLA_Obj_free( &u );
534 // FLA_Obj_free( &z );
535 // FLA_Obj_free( &w );
536 FLA_free( buff_u );
537 FLA_free( buff_z );
538 FLA_free( buff_w );
539
540 return FLA_SUCCESS;
541}
FLA_Error FLA_Househ2_UT_l_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *tau)
Definition FLA_Househ2_UT.c:274
void bl1_daxpyv(conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy)
Definition bl1_axpyv.c:21
void bl1_dcopyv(conj1_t conj, int m, double *x, int incx, double *y, int incy)
Definition bl1_copyv.c:42
void bl1_ddot(conj1_t conj, int n, double *x, int incx, double *y, int incy, double *rho)
Definition bl1_dot.c:26
void bl1_dgemv(trans1_t transa, conj1_t conjx, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
Definition bl1_gemv.c:69
void bl1_dscalv(conj1_t conj, int n, double *alpha, double *x, int incx)
Definition bl1_scalv.c:24
void bl1_dsymv(uplo1_t uplo, int m, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
Definition bl1_symv.c:56
void bl1_dsyr2(uplo1_t uplo, int m, double *alpha, double *x, int incx, double *y, int incy, double *a, int a_rs, int a_cs)
Definition bl1_syr2.c:58

References bl1_daxpyv(), bl1_dcopyv(), bl1_ddot(), bl1_dgemv(), bl1_dscalv(), bl1_dsymv(), bl1_dsyr2(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, FLA_free(), FLA_Househ2_UT_l_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_step_opt_var2().

◆ FLA_Tridiag_UT_l_step_ops_var2()

FLA_Error FLA_Tridiag_UT_l_step_ops_var2 ( int  m_A,
int  m_T,
float buff_A,
int  rs_A,
int  cs_A,
float buff_T,
int  rs_T,
int  cs_T 
)
101{
102 float* buff_2 = FLA_FLOAT_PTR( FLA_TWO );
103 float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
104 float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
106
107 float first_elem;
108 float beta;
109 float inv_tau11;
110 float minus_inv_tau11;
113 int i;
114
115 // b_alg = FLA_Obj_length( T );
116 int b_alg = m_T;
117
118 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
119 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
120 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
121 float* buff_u = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
122 float* buff_z = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
123 float* buff_w = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
124 int inc_u = 1;
125 int inc_z = 1;
126 int inc_w = 1;
127
128 // Initialize some variables (only to prevent compiler warnings).
131
132 for ( i = 0; i < b_alg; ++i )
133 {
134 float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
135 float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
136 float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
137 float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
138
139 float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
140 float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
141
142 float* upsilon11= buff_u + (i )*inc_u;
143 float* u21 = buff_u + (i+1)*inc_u;
144
145 float* zeta11 = buff_z + (i )*inc_z;
146 float* z21 = buff_z + (i+1)*inc_z;
147
148 float* w21 = buff_w + (i+1)*inc_w;
149
150 float* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
151 float* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
152
153 int m_ahead = m_A - i - 1;
154 int m_behind = i;
155 int n_behind = i;
156
157 /*------------------------------------------------------------*/
158
159 if ( m_behind > 0 )
160 {
161 // FLA_Copy( upsilon11, minus_upsilon11 );
162 // FLA_Scal( FLA_MINUS_ONE, minus_upsilon11 );
163 // FLA_Copy( minus_upsilon11, minus_conj_upsilon11 );
166
167 // FLA_Copy( zeta11, minus_zeta11 );
168 // FLA_Scal( FLA_MINUS_ONE, minus_zeta11 );
169 // FLA_Copy( minus_zeta11, minus_conj_zeta11 );
172
173 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon11, zeta11, alpha11 );
174 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_zeta11, upsilon11, alpha11 );
176 1,
178 zeta11, 1,
179 alpha11, 1 );
181 1,
183 upsilon11, 1,
184 alpha11, 1 );
185
186 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_zeta11, u21, a21 );
187 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_upsilon11, z21, a21 );
189 m_ahead,
191 u21, inc_u,
192 a21, rs_A );
194 m_ahead,
196 z21, inc_z,
197 a21, rs_A );
198 }
199
200 if ( m_ahead > 0 )
201 {
202 // FLA_Househ2_UT( FLA_LEFT,
203 // a21_t,
204 // a21_b, tau11 );
206 a21_t,
207 a21_b, rs_A,
208 tau11 );
209
210 // FLA_Set( FLA_ONE, inv_tau11 );
211 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
212 // FLA_Copy( inv_tau11, minus_inv_tau11 );
213 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
216
217 // FLA_Copy( a21_t, first_elem );
218 // FLA_Set( FLA_ONE, a21_t );
219 first_elem = *a21_t;
220 *a21_t = *buff_1;
221 }
222
223 if ( m_behind > 0 )
224 {
225 // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, u21, z21, A22 );
227 m_ahead,
228 buff_m1,
229 u21, inc_u,
230 z21, inc_z,
231 A22, rs_A, cs_A );
232 }
233
234 if ( m_ahead > 0 )
235 {
236 // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, w21 );
238 m_ahead,
239 buff_1,
240 A22, rs_A, cs_A,
241 a21, rs_A,
242 buff_0,
243 w21, inc_w );
244
245 // FLA_Copy( a21, u21 );
246 // FLA_Copy( w21, z21 );
248 m_ahead,
249 a21, rs_A,
250 u21, inc_u );
252 m_ahead,
253 w21, inc_w,
254 z21, inc_z );
255
256 // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
257 // FLA_Inv_scal( FLA_TWO, beta );
259 m_ahead,
260 a21, rs_A,
261 z21, inc_z,
262 &beta );
264
265 // FLA_Scal( minus_inv_tau11, beta );
266 // FLA_Axpy( beta, a21, z21 );
267 // FLA_Scal( inv_tau11, z21 );
270 m_ahead,
271 &beta,
272 a21, rs_A,
273 z21, inc_z );
275 m_ahead,
276 &inv_tau11,
277 z21, inc_z );
278
279 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
282 m_ahead,
283 n_behind,
284 buff_1,
285 A20, rs_A, cs_A,
286 a21, rs_A,
287 buff_0,
288 t01, rs_T );
289
290 // FLA_Copy( first_elem, a21_t );
291 *a21_t = first_elem;
292 }
293
294 if ( m_behind + 1 == b_alg && m_ahead > 0 )
295 {
296 // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, u21, z21, A22 );
298 m_ahead,
299 buff_m1,
300 u21, inc_u,
301 z21, inc_z,
302 A22, rs_A, cs_A );
303 }
304
305 /*------------------------------------------------------------*/
306
307 }
308
309 // FLA_Obj_free( &u );
310 // FLA_Obj_free( &z );
311 // FLA_Obj_free( &w );
312 FLA_free( buff_u );
313 FLA_free( buff_z );
314 FLA_free( buff_w );
315
316 return FLA_SUCCESS;
317}
FLA_Error FLA_Househ2_UT_l_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *tau)
Definition FLA_Househ2_UT.c:160
void bl1_saxpyv(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy)
Definition bl1_axpyv.c:13
void bl1_scopyv(conj1_t conj, int m, float *x, int incx, float *y, int incy)
Definition bl1_copyv.c:35
void bl1_sdot(conj1_t conj, int n, float *x, int incx, float *y, int incy, float *rho)
Definition bl1_dot.c:13
void bl1_sgemv(trans1_t transa, conj1_t conjx, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
Definition bl1_gemv.c:13
void bl1_sscalv(conj1_t conj, int n, float *alpha, float *x, int incx)
Definition bl1_scalv.c:13
void bl1_ssymv(uplo1_t uplo, int m, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
Definition bl1_symv.c:13
void bl1_ssyr2(uplo1_t uplo, int m, float *alpha, float *x, int incx, float *y, int incy, float *a, int a_rs, int a_cs)
Definition bl1_syr2.c:13

References bl1_saxpyv(), bl1_scopyv(), bl1_sdot(), bl1_sgemv(), bl1_sscalv(), bl1_ssymv(), bl1_ssyr2(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, FLA_free(), FLA_Househ2_UT_l_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_step_opt_var2().

◆ FLA_Tridiag_UT_l_step_opt_var2()

FLA_Error FLA_Tridiag_UT_l_step_opt_var2 ( FLA_Obj  A,
FLA_Obj  T 
)
19{
20 FLA_Datatype datatype;
21 int m_A, m_T;
22 int rs_A, cs_A;
23 int rs_T, cs_T;
24
25 datatype = FLA_Obj_datatype( A );
26
27 m_A = FLA_Obj_length( A );
28 m_T = FLA_Obj_length( T );
29
32
35
36
37 switch ( datatype )
38 {
39 case FLA_FLOAT:
40 {
41 float* buff_A = FLA_FLOAT_PTR( A );
42 float* buff_T = FLA_FLOAT_PTR( T );
43
45 m_T,
47 buff_T, rs_T, cs_T );
48
49 break;
50 }
51
52 case FLA_DOUBLE:
53 {
54 double* buff_A = FLA_DOUBLE_PTR( A );
55 double* buff_T = FLA_DOUBLE_PTR( T );
56
58 m_T,
60 buff_T, rs_T, cs_T );
61
62 break;
63 }
64
65 case FLA_COMPLEX:
66 {
69
71 m_T,
73 buff_T, rs_T, cs_T );
74
75 break;
76 }
77
79 {
82
84 m_T,
86 buff_T, rs_T, cs_T );
87
88 break;
89 }
90 }
91
92 return FLA_SUCCESS;
93}
FLA_Error FLA_Tridiag_UT_l_step_opc_var2(int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T)
Definition FLA_Tridiag_UT_l_opt_var2.c:545
FLA_Error FLA_Tridiag_UT_l_step_opd_var2(int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T)
Definition FLA_Tridiag_UT_l_opt_var2.c:321
FLA_Error FLA_Tridiag_UT_l_step_ops_var2(int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T)
Definition FLA_Tridiag_UT_l_opt_var2.c:97
FLA_Error FLA_Tridiag_UT_l_step_opz_var2(int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T)
Definition FLA_Tridiag_UT_l_opt_var2.c:772
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition FLA_Query.c:167
dim_t FLA_Obj_length(FLA_Obj obj)
Definition FLA_Query.c:116
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition FLA_Query.c:174
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition FLA_Query.c:13
int FLA_Datatype
Definition FLA_type_defs.h:49
Definition blis_type_defs.h:138

References FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Tridiag_UT_l_step_opc_var2(), FLA_Tridiag_UT_l_step_opd_var2(), FLA_Tridiag_UT_l_step_ops_var2(), FLA_Tridiag_UT_l_step_opz_var2(), and i.

Referenced by FLA_Tridiag_UT_l_blk_var2(), and FLA_Tridiag_UT_l_opt_var2().

◆ FLA_Tridiag_UT_l_step_opz_var2()

FLA_Error FLA_Tridiag_UT_l_step_opz_var2 ( int  m_A,
int  m_T,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_T,
int  rs_T,
int  cs_T 
)
776{
781
788 int i;
789
790 // b_alg = FLA_Obj_length( T );
791 int b_alg = m_T;
792
793 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
794 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
795 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
796 dcomplex* buff_u = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
797 dcomplex* buff_z = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
798 dcomplex* buff_w = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
799 int inc_u = 1;
800 int inc_z = 1;
801 int inc_w = 1;
802
803 // Initialize some variables (only to prevent compiler warnings).
806
807 for ( i = 0; i < b_alg; ++i )
808 {
809 dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
810 dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
811 dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
812 dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
813
814 dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
815 dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
816
818 dcomplex* u21 = buff_u + (i+1)*inc_u;
819
820 dcomplex* zeta11 = buff_z + (i )*inc_z;
821 dcomplex* z21 = buff_z + (i+1)*inc_z;
822
823 dcomplex* w21 = buff_w + (i+1)*inc_w;
824
825 dcomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
826 dcomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
827
828 int m_ahead = m_A - i - 1;
829 int m_behind = i;
830 int n_behind = i;
831
832 /*------------------------------------------------------------*/
833
834 if ( m_behind > 0 )
835 {
836 // FLA_Copy( upsilon11, minus_upsilon11 );
837 // FLA_Scal( FLA_MINUS_ONE, minus_upsilon11 );
838 // FLA_Copy( minus_upsilon11, minus_conj_upsilon11 );
841
842 // FLA_Copy( zeta11, minus_zeta11 );
843 // FLA_Scal( FLA_MINUS_ONE, minus_zeta11 );
844 // FLA_Copy( minus_zeta11, minus_conj_zeta11 );
847
848 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon11, zeta11, alpha11 );
849 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_zeta11, upsilon11, alpha11 );
851 1,
853 zeta11, 1,
854 alpha11, 1 );
856 1,
858 upsilon11, 1,
859 alpha11, 1 );
860
861 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_zeta11, u21, a21 );
862 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_upsilon11, z21, a21 );
864 m_ahead,
866 u21, inc_u,
867 a21, rs_A );
869 m_ahead,
871 z21, inc_z,
872 a21, rs_A );
873 }
874
875 if ( m_ahead > 0 )
876 {
877 // FLA_Househ2_UT( FLA_LEFT,
878 // a21_t,
879 // a21_b, tau11 );
881 a21_t,
882 a21_b, rs_A,
883 tau11 );
884
885 // FLA_Set( FLA_ONE, inv_tau11 );
886 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
887 // FLA_Copy( inv_tau11, minus_inv_tau11 );
888 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
891
892 // FLA_Copy( a21_t, first_elem );
893 // FLA_Set( FLA_ONE, a21_t );
894 first_elem = *a21_t;
895 *a21_t = *buff_1;
896 }
897
898 if ( m_behind > 0 )
899 {
900 // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, u21, z21, A22 );
903 m_ahead,
904 buff_m1,
905 u21, inc_u,
906 z21, inc_z,
907 A22, rs_A, cs_A );
908 }
909
910 if ( m_ahead > 0 )
911 {
912 // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, w21 );
915 m_ahead,
916 buff_1,
917 A22, rs_A, cs_A,
918 a21, rs_A,
919 buff_0,
920 w21, inc_w );
921
922 // FLA_Copy( a21, u21 );
923 // FLA_Copy( w21, z21 );
925 m_ahead,
926 a21, rs_A,
927 u21, inc_u );
929 m_ahead,
930 w21, inc_w,
931 z21, inc_z );
932
933 // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
934 // FLA_Inv_scal( FLA_TWO, beta );
936 m_ahead,
937 a21, rs_A,
938 z21, inc_z,
939 &beta );
941
942 // FLA_Scal( minus_inv_tau11, beta );
943 // FLA_Axpy( beta, a21, z21 );
944 // FLA_Scal( inv_tau11, z21 );
947 m_ahead,
948 &beta,
949 a21, rs_A,
950 z21, inc_z );
952 m_ahead,
953 &inv_tau11,
954 z21, inc_z );
955
956 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
959 m_ahead,
960 n_behind,
961 buff_1,
962 A20, rs_A, cs_A,
963 a21, rs_A,
964 buff_0,
965 t01, rs_T );
966
967 // FLA_Copy( first_elem, a21_t );
968 *a21_t = first_elem;
969 }
970
971 if ( m_behind + 1 == b_alg && m_ahead > 0 )
972 {
973 // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, u21, z21, A22 );
976 m_ahead,
977 buff_m1,
978 u21, inc_u,
979 z21, inc_z,
980 A22, rs_A, cs_A );
981 }
982
983 /*------------------------------------------------------------*/
984
985 }
986
987 // FLA_Obj_free( &u );
988 // FLA_Obj_free( &z );
989 // FLA_Obj_free( &w );
990 FLA_free( buff_u );
991 FLA_free( buff_z );
992 FLA_free( buff_w );
993
994 return FLA_SUCCESS;
995}
FLA_Error FLA_Househ2_UT_l_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *tau)
Definition FLA_Househ2_UT.c:521
void bl1_zaxpyv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy)
Definition bl1_axpyv.c:60
void bl1_zcopyv(conj1_t conj, int m, dcomplex *x, int incx, dcomplex *y, int incy)
Definition bl1_copyv.c:63
void bl1_zdot(conj1_t conj, int n, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *rho)
Definition bl1_dot.c:65
bl1_zscals(beta, rho_yz)
void bl1_zgemv(trans1_t transa, conj1_t conjx, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition bl1_gemv.c:255
void bl1_zhemv(uplo1_t uplo, conj1_t conj, int m, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition bl1_hemv.c:134
void bl1_zher2(uplo1_t uplo, conj1_t conj, int m, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *a, int a_rs, int a_cs)
Definition bl1_her2.c:121
void bl1_zscalv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx)
Definition bl1_scalv.c:72

References bl1_zaxpyv(), bl1_zcopyv(), bl1_zdot(), bl1_zgemv(), bl1_zhemv(), bl1_zher2(), bl1_zscals(), bl1_zscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_step_opt_var2().