libflame revision_anchor
Functions
FLA_Hess_UT_fus_var4.c File Reference

(r)

Functions

FLA_Error FLA_Hess_UT_ofu_var4 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_ofu_var4 (FLA_Obj A, FLA_Obj Y, FLA_Obj Z, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_ofs_var4 (int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_Y, int rs_Y, int cs_Y, float *buff_Z, int rs_Z, int cs_Z, float *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_ofd_var4 (int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_Y, int rs_Y, int cs_Y, double *buff_Z, int rs_Z, int cs_Z, double *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_ofc_var4 (int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_Y, int rs_Y, int cs_Y, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_ofz_var4 (int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_Y, int rs_Y, int cs_Y, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_T, int rs_T, int cs_T)
 

Function Documentation

◆ FLA_Hess_UT_ofu_var4()

FLA_Error FLA_Hess_UT_ofu_var4 ( FLA_Obj  A,
FLA_Obj  T 
)
14{
16 FLA_Obj Y, Z;
17
20
22
23 FLA_Obj_free( &Y );
24 FLA_Obj_free( &Z );
25
26 return r_val;
27}
FLA_Error FLA_Hess_UT_step_ofu_var4(FLA_Obj A, FLA_Obj Y, FLA_Obj Z, FLA_Obj T)
Definition FLA_Hess_UT_fus_var4.c:29
FLA_Error FLA_Obj_create_conf_to(FLA_Trans trans, FLA_Obj old, FLA_Obj *obj)
Definition FLA_Obj.c:286
FLA_Error FLA_Obj_free(FLA_Obj *obj)
Definition FLA_Obj.c:588
int FLA_Error
Definition FLA_type_defs.h:47
int i
Definition bl1_axmyv2.c:145
Definition FLA_type_defs.h:159

References FLA_Hess_UT_step_ofu_var4(), FLA_Obj_create_conf_to(), FLA_Obj_free(), and i.

◆ FLA_Hess_UT_step_ofc_var4()

FLA_Error FLA_Hess_UT_step_ofc_var4 ( int  m_A,
int  m_T,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_Y,
int  rs_Y,
int  cs_Y,
scomplex buff_Z,
int  rs_Z,
int  cs_Z,
scomplex buff_T,
int  rs_T,
int  cs_T 
)
688{
693
699 int i;
700
701 // b_alg = FLA_Obj_length( T );
702 int b_alg = m_T;
703
704 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &d );
705 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &e );
706 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
707 scomplex* buff_e = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
708 int inc_e = 1;
709
710 // FLA_Set( FLA_ZERO, Y );
711 // FLA_Set( FLA_ZERO, Z );
712 bl1_csetm( m_A,
713 b_alg,
714 buff_0,
715 buff_Y, rs_Y, cs_Y );
716 bl1_csetm( m_A,
717 b_alg,
718 buff_0,
719 buff_Z, rs_Z, cs_Z );
720
721 for ( i = 0; i < b_alg; ++i )
722 {
723 scomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
724 scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
725 scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
726 scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
727 scomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
728 scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
729 scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
730
731 scomplex* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
732 scomplex* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
733 scomplex* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
734
735 scomplex* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
736 scomplex* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
737 scomplex* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
738
739 scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
740 scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
741
742 scomplex* e0 = buff_e + (0 )*inc_e;
743
744 scomplex* a10t_r = a10t + (i-1)*cs_A + (0 )*rs_A;
745
746 scomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
747 scomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
748
749 scomplex* ABL = a10t;
750 scomplex* ZBL = z10t;
751
753
754 int m_ahead = m_A - i - 1;
755 int n_ahead = m_A - i - 1;
756 int m_behind = i;
757 int n_behind = i;
758
759 /*------------------------------------------------------------*/
760
761 if ( m_behind > 0 )
762 {
763 // FLA_Copy( a10t_r, last_elem );
764 // FLA_Set( FLA_ONE, a10t_r );
765 last_elem = *a10t_r;
766 *a10t_r = *buff_1;
767 }
768
769 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
770 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a10t, FLA_ONE, a2 );
773 m_ahead + 1,
774 n_behind,
775 buff_m1,
776 ABL, rs_A, cs_A,
777 y10t, cs_Y,
778 buff_1,
779 a2, rs_A );
782 m_ahead + 1,
783 n_behind,
784 buff_m1,
785 ZBL, rs_Z, cs_Z,
786 a10t, cs_A,
787 buff_1,
788 a2, rs_A );
789
790 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
791 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, A20, z10t, FLA_ONE, a12t );
794 m_ahead,
795 n_behind,
796 buff_m1,
797 Y20, rs_Y, cs_Y,
798 a10t, cs_A,
799 buff_1,
800 a12t, cs_A );
803 m_ahead,
804 n_behind,
805 buff_m1,
806 A20, rs_A, cs_A,
807 z10t, cs_Z,
808 buff_1,
809 a12t, cs_A );
810
811 if ( m_behind > 0 )
812 {
813 // FLA_Copy( last_elem, a10t_r );
814 *a10t_r = last_elem;
815 }
816
817 if ( m_ahead > 0 )
818 {
819 // FLA_Househ2_UT( FLA_LEFT,
820 // a21_t,
821 // a21_b, tau11 );
823 a21_t,
824 a21_b, rs_A,
825 tau11 );
826
827 // FLA_Set( FLA_ONE, inv_tau11 );
828 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
829 // FLA_Copy( inv_tau11, minus_inv_tau11 );
830 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
833
834 // FLA_Copy( a21_t, first_elem );
835 // FLA_Set( FLA_ONE, a21_t );
836 first_elem = *a21_t;
837 *a21_t = *buff_1;
838
839 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, y21 );
840 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, z21 );
842 n_ahead,
843 A22, rs_A, cs_A,
844 a21, rs_A,
845 y21, rs_Y,
846 z21, rs_Z );
847
848 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, d0 );
849 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Y20, a21, FLA_ZERO, e0 );
850 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, a21, FLA_ZERO, f0 );
851 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
852 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, y21 );
853 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, e0, FLA_ONE, z21 );
854 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, d0, FLA_ONE, z21 );
855 // FLA_Copy( d0, t01 );
857 n_behind,
858 buff_m1,
859 A20, rs_A, cs_A,
860 Y20, rs_Y, cs_Y,
861 Z20, rs_Z, cs_Z,
862 t01, rs_T,
863 a21, rs_A,
864 y21, rs_Y,
865 z21, rs_Z );
866
867 // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
868 // FLA_Inv_scal( FLA_TWO, beta );
869 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
871 m_ahead,
872 a21, rs_A,
873 z21, rs_Z,
874 &beta );
877
878 // FLA_Scal( minus_inv_tau11, conj_beta );
879 // FLA_Axpy( conj_beta, a21, y21 );
880 // FLA_Scal( inv_tau11, y21 );
883 m_ahead,
884 &conj_beta,
885 a21, rs_A,
886 y21, rs_Y );
888 m_ahead,
889 &inv_tau11,
890 y21, rs_Y );
891
892 // FLA_Scal( minus_inv_tau11, beta );
893 // FLA_Axpy( beta, a21, z21 );
894 // FLA_Scal( inv_tau11, z21 );
897 m_ahead,
898 &beta,
899 a21, rs_A,
900 z21, rs_Z );
902 m_ahead,
903 &inv_tau11,
904 z21, rs_Z );
905
906 // FLA_Dot( a12t, a21, dot_product );
907 // FLA_Scal( minus_inv_tau11, dot_product );
908 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
910 m_ahead,
911 a12t, cs_A,
912 a21, rs_A,
913 &dot_product );
916 m_ahead,
918 a21, rs_A,
919 a12t, cs_A );
920
921 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, e0 );
922 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, e0, a21, A02 );
925 m_behind,
926 n_ahead,
927 buff_1,
928 A02, rs_A, cs_A,
929 a21, rs_A,
930 buff_0,
931 e0, inc_e );
934 m_behind,
935 n_ahead,
937 e0, inc_e,
938 a21, rs_A,
939 A02, rs_A, cs_A );
940
941 // FLA_Copy( first_elem, a21_t );
942 *a21_t = first_elem;
943 }
944
945 /*------------------------------------------------------------*/
946
947 }
948
949 // FLA_Obj_free( &e );
950 FLA_free( buff_e );
951
952 return FLA_SUCCESS;
953}
FLA_Error FLA_Fused_Ahx_Ax_opc_var1(int m_A, int n_A, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_x, int inc_x, scomplex *buff_v, int inc_v, scomplex *buff_w, int inc_w)
Definition FLA_Fused_Ahx_Ax_opt_var1.c:256
FLA_Error FLA_Fused_Uhu_Yhu_Zhu_opc_var1(int m_U, int n_U, scomplex *buff_delta, scomplex *buff_U, int rs_U, int cs_U, scomplex *buff_Y, int rs_Y, int cs_Y, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_t, int inc_t, scomplex *buff_u, int inc_u, scomplex *buff_y, int inc_y, scomplex *buff_z, int inc_z)
Definition FLA_Fused_Uhu_Yhu_Zhu_opt_var1.c:398
FLA_Obj FLA_TWO
Definition FLA_Init.c:17
FLA_Obj FLA_MINUS_ONE
Definition FLA_Init.c:22
FLA_Obj FLA_ZERO
Definition FLA_Init.c:20
FLA_Obj FLA_ONE
Definition FLA_Init.c:18
void FLA_free(void *ptr)
Definition FLA_Memory.c:247
void * FLA_malloc(size_t size)
Definition FLA_Memory.c:111
FLA_Error FLA_Househ2_UT_l_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *tau)
Definition FLA_Househ2_UT.c:390
void bl1_caxpyv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
Definition bl1_axpyv.c:29
void bl1_cdot(conj1_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho)
Definition bl1_dot.c:39
void bl1_cgemv(trans1_t transa, conj1_t conjx, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition bl1_gemv.c:125
void bl1_cger(conj1_t conjx, conj1_t conjy, int m, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy, scomplex *a, int a_rs, int a_cs)
Definition bl1_ger.c:111
void bl1_cscalv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx)
Definition bl1_scalv.c:46
void bl1_csetm(int m, int n, scomplex *sigma, scomplex *a, int a_rs, int a_cs)
Definition bl1_setm.c:61
@ BLIS1_NO_TRANSPOSE
Definition blis_type_defs.h:54
@ BLIS1_CONJ_NO_TRANSPOSE
Definition blis_type_defs.h:56
@ BLIS1_CONJUGATE
Definition blis_type_defs.h:82
@ BLIS1_NO_CONJUGATE
Definition blis_type_defs.h:81
Definition blis_type_defs.h:133

References bl1_caxpyv(), bl1_cdot(), bl1_cgemv(), bl1_cger(), bl1_cscalv(), bl1_csetm(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Fused_Ahx_Ax_opc_var1(), FLA_Fused_Uhu_Yhu_Zhu_opc_var1(), FLA_Househ2_UT_l_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_ofu_var4().

◆ FLA_Hess_UT_step_ofd_var4()

FLA_Error FLA_Hess_UT_step_ofd_var4 ( int  m_A,
int  m_T,
double buff_A,
int  rs_A,
int  cs_A,
double buff_Y,
int  rs_Y,
int  cs_Y,
double buff_Z,
int  rs_Z,
int  cs_Z,
double buff_T,
int  rs_T,
int  cs_T 
)
413{
414 double* buff_2 = FLA_DOUBLE_PTR( FLA_TWO );
415 double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
416 double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
418
419 double first_elem, last_elem;
420 double dot_product;
421 double beta, conj_beta;
422 double inv_tau11;
423 double minus_inv_tau11;
424 int i;
425
426 // b_alg = FLA_Obj_length( T );
427 int b_alg = m_T;
428
429 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &d );
430 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &e );
431 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
432 double* buff_e = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
433 int inc_e = 1;
434
435 // FLA_Set( FLA_ZERO, Y );
436 // FLA_Set( FLA_ZERO, Z );
437 bl1_dsetm( m_A,
438 b_alg,
439 buff_0,
440 buff_Y, rs_Y, cs_Y );
441 bl1_dsetm( m_A,
442 b_alg,
443 buff_0,
444 buff_Z, rs_Z, cs_Z );
445
446 for ( i = 0; i < b_alg; ++i )
447 {
448 double* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
449 double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
450 double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
451 double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
452 double* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
453 double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
454 double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
455
456 double* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
457 double* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
458 double* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
459
460 double* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
461 double* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
462 double* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
463
464 double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
465 double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
466
467 double* e0 = buff_e + (0 )*inc_e;
468
469 double* a10t_r = a10t + (i-1)*cs_A + (0 )*rs_A;
470
471 double* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
472 double* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
473
474 double* ABL = a10t;
475 double* ZBL = z10t;
476
477 double* a2 = alpha11;
478
479 int m_ahead = m_A - i - 1;
480 int n_ahead = m_A - i - 1;
481 int m_behind = i;
482 int n_behind = i;
483
484 /*------------------------------------------------------------*/
485
486 if ( m_behind > 0 )
487 {
488 // FLA_Copy( a10t_r, last_elem );
489 // FLA_Set( FLA_ONE, a10t_r );
490 last_elem = *a10t_r;
491 *a10t_r = *buff_1;
492 }
493
494 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
495 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a10t, FLA_ONE, a2 );
498 m_ahead + 1,
499 n_behind,
500 buff_m1,
501 ABL, rs_A, cs_A,
502 y10t, cs_Y,
503 buff_1,
504 a2, rs_A );
507 m_ahead + 1,
508 n_behind,
509 buff_m1,
510 ZBL, rs_Z, cs_Z,
511 a10t, cs_A,
512 buff_1,
513 a2, rs_A );
514
515 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
516 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, A20, z10t, FLA_ONE, a12t );
519 m_ahead,
520 n_behind,
521 buff_m1,
522 Y20, rs_Y, cs_Y,
523 a10t, cs_A,
524 buff_1,
525 a12t, cs_A );
528 m_ahead,
529 n_behind,
530 buff_m1,
531 A20, rs_A, cs_A,
532 z10t, cs_Z,
533 buff_1,
534 a12t, cs_A );
535
536 if ( m_behind > 0 )
537 {
538 // FLA_Copy( last_elem, a10t_r );
539 *a10t_r = last_elem;
540 }
541
542 if ( m_ahead > 0 )
543 {
544 // FLA_Househ2_UT( FLA_LEFT,
545 // a21_t,
546 // a21_b, tau11 );
548 a21_t,
549 a21_b, rs_A,
550 tau11 );
551
552 // FLA_Set( FLA_ONE, inv_tau11 );
553 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
554 // FLA_Copy( inv_tau11, minus_inv_tau11 );
555 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
558
559 // FLA_Copy( a21_t, first_elem );
560 // FLA_Set( FLA_ONE, a21_t );
561 first_elem = *a21_t;
562 *a21_t = *buff_1;
563
564 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, y21 );
565 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, z21 );
567 n_ahead,
568 A22, rs_A, cs_A,
569 a21, rs_A,
570 y21, rs_Y,
571 z21, rs_Z );
572
573 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, d0 );
574 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Y20, a21, FLA_ZERO, e0 );
575 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, a21, FLA_ZERO, f0 );
576 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
577 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, y21 );
578 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, e0, FLA_ONE, z21 );
579 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, d0, FLA_ONE, z21 );
580 // FLA_Copy( d0, t01 );
582 n_behind,
583 buff_m1,
584 A20, rs_A, cs_A,
585 Y20, rs_Y, cs_Y,
586 Z20, rs_Z, cs_Z,
587 t01, rs_T,
588 a21, rs_A,
589 y21, rs_Y,
590 z21, rs_Z );
591
592 // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
593 // FLA_Inv_scal( FLA_TWO, beta );
594 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
596 m_ahead,
597 a21, rs_A,
598 z21, rs_Z,
599 &beta );
602
603 // FLA_Scal( minus_inv_tau11, conj_beta );
604 // FLA_Axpy( conj_beta, a21, y21 );
605 // FLA_Scal( inv_tau11, y21 );
608 m_ahead,
609 &conj_beta,
610 a21, rs_A,
611 y21, rs_Y );
613 m_ahead,
614 &inv_tau11,
615 y21, rs_Y );
616
617 // FLA_Scal( minus_inv_tau11, beta );
618 // FLA_Axpy( beta, a21, z21 );
619 // FLA_Scal( inv_tau11, z21 );
622 m_ahead,
623 &beta,
624 a21, rs_A,
625 z21, rs_Z );
627 m_ahead,
628 &inv_tau11,
629 z21, rs_Z );
630
631 // FLA_Dot( a12t, a21, dot_product );
632 // FLA_Scal( minus_inv_tau11, dot_product );
633 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
635 m_ahead,
636 a12t, cs_A,
637 a21, rs_A,
638 &dot_product );
641 m_ahead,
643 a21, rs_A,
644 a12t, cs_A );
645
646 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, e0 );
647 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, e0, a21, A02 );
650 m_behind,
651 n_ahead,
652 buff_1,
653 A02, rs_A, cs_A,
654 a21, rs_A,
655 buff_0,
656 e0, inc_e );
659 m_behind,
660 n_ahead,
662 e0, inc_e,
663 a21, rs_A,
664 A02, rs_A, cs_A );
665
666 // FLA_Copy( first_elem, a21_t );
667 *a21_t = first_elem;
668 }
669
670 /*------------------------------------------------------------*/
671
672 }
673
674 // FLA_Obj_free( &e );
675 FLA_free( buff_e );
676
677 return FLA_SUCCESS;
678}
FLA_Error FLA_Fused_Ahx_Ax_opd_var1(int m_A, int n_A, double *buff_A, int rs_A, int cs_A, double *buff_x, int inc_x, double *buff_v, int inc_v, double *buff_w, int inc_w)
Definition FLA_Fused_Ahx_Ax_opt_var1.c:173
FLA_Error FLA_Fused_Uhu_Yhu_Zhu_opd_var1(int m_U, int n_U, double *buff_delta, double *buff_U, int rs_U, int cs_U, double *buff_Y, int rs_Y, int cs_Y, double *buff_Z, int rs_Z, int cs_Z, double *buff_t, int inc_t, double *buff_u, int inc_u, double *buff_y, int inc_y, double *buff_z, int inc_z)
Definition FLA_Fused_Uhu_Yhu_Zhu_opt_var1.c:270
FLA_Error FLA_Househ2_UT_l_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *tau)
Definition FLA_Househ2_UT.c:274
void bl1_daxpyv(conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy)
Definition bl1_axpyv.c:21
void bl1_ddot(conj1_t conj, int n, double *x, int incx, double *y, int incy, double *rho)
Definition bl1_dot.c:26
void bl1_dgemv(trans1_t transa, conj1_t conjx, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
Definition bl1_gemv.c:69
void bl1_dger(conj1_t conjx, conj1_t conjy, int m, int n, double *alpha, double *x, int incx, double *y, int incy, double *a, int a_rs, int a_cs)
Definition bl1_ger.c:62
void bl1_dscalv(conj1_t conj, int n, double *alpha, double *x, int incx)
Definition bl1_scalv.c:24
void bl1_dsetm(int m, int n, double *sigma, double *a, int a_rs, int a_cs)
Definition bl1_setm.c:45

References bl1_daxpyv(), bl1_ddot(), bl1_dgemv(), bl1_dger(), bl1_dscalv(), bl1_dsetm(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Fused_Ahx_Ax_opd_var1(), FLA_Fused_Uhu_Yhu_Zhu_opd_var1(), FLA_Househ2_UT_l_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_ofu_var4().

◆ FLA_Hess_UT_step_ofs_var4()

FLA_Error FLA_Hess_UT_step_ofs_var4 ( int  m_A,
int  m_T,
float buff_A,
int  rs_A,
int  cs_A,
float buff_Y,
int  rs_Y,
int  cs_Y,
float buff_Z,
int  rs_Z,
int  cs_Z,
float buff_T,
int  rs_T,
int  cs_T 
)
138{
139 float* buff_2 = FLA_FLOAT_PTR( FLA_TWO );
140 float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
141 float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
143
144 float first_elem, last_elem;
145 float dot_product;
146 float beta, conj_beta;
147 float inv_tau11;
148 float minus_inv_tau11;
149 int i;
150
151 // b_alg = FLA_Obj_length( T );
152 int b_alg = m_T;
153
154 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &d );
155 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &e );
156 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
157 float* buff_e = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
158 int inc_e = 1;
159
160 // FLA_Set( FLA_ZERO, Y );
161 // FLA_Set( FLA_ZERO, Z );
162 bl1_ssetm( m_A,
163 b_alg,
164 buff_0,
165 buff_Y, rs_Y, cs_Y );
166 bl1_ssetm( m_A,
167 b_alg,
168 buff_0,
169 buff_Z, rs_Z, cs_Z );
170
171 for ( i = 0; i < b_alg; ++i )
172 {
173 float* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
174 float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
175 float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
176 float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
177 float* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
178 float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
179 float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
180
181 float* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
182 float* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
183 float* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
184
185 float* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
186 float* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
187 float* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
188
189 float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
190 float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
191
192 float* e0 = buff_e + (0 )*inc_e;
193
194 float* a10t_r = a10t + (i-1)*cs_A + (0 )*rs_A;
195
196 float* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
197 float* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
198
199 float* ABL = a10t;
200 float* ZBL = z10t;
201
202 float* a2 = alpha11;
203
204 int m_ahead = m_A - i - 1;
205 int n_ahead = m_A - i - 1;
206 int m_behind = i;
207 int n_behind = i;
208
209 /*------------------------------------------------------------*/
210
211 if ( m_behind > 0 )
212 {
213 // FLA_Copy( a10t_r, last_elem );
214 // FLA_Set( FLA_ONE, a10t_r );
215 last_elem = *a10t_r;
216 *a10t_r = *buff_1;
217 }
218
219 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
220 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a10t, FLA_ONE, a2 );
223 m_ahead + 1,
224 n_behind,
225 buff_m1,
226 ABL, rs_A, cs_A,
227 y10t, cs_Y,
228 buff_1,
229 a2, rs_A );
232 m_ahead + 1,
233 n_behind,
234 buff_m1,
235 ZBL, rs_Z, cs_Z,
236 a10t, cs_A,
237 buff_1,
238 a2, rs_A );
239
240 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
241 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, A20, z10t, FLA_ONE, a12t );
244 m_ahead,
245 n_behind,
246 buff_m1,
247 Y20, rs_Y, cs_Y,
248 a10t, cs_A,
249 buff_1,
250 a12t, cs_A );
253 m_ahead,
254 n_behind,
255 buff_m1,
256 A20, rs_A, cs_A,
257 z10t, cs_Z,
258 buff_1,
259 a12t, cs_A );
260
261 if ( m_behind > 0 )
262 {
263 // FLA_Copy( last_elem, a10t_r );
264 *a10t_r = last_elem;
265 }
266
267 if ( m_ahead > 0 )
268 {
269 // FLA_Househ2_UT( FLA_LEFT,
270 // a21_t,
271 // a21_b, tau11 );
273 a21_t,
274 a21_b, rs_A,
275 tau11 );
276
277 // FLA_Set( FLA_ONE, inv_tau11 );
278 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
279 // FLA_Copy( inv_tau11, minus_inv_tau11 );
280 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
283
284 // FLA_Copy( a21_t, first_elem );
285 // FLA_Set( FLA_ONE, a21_t );
286 first_elem = *a21_t;
287 *a21_t = *buff_1;
288
289 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, y21 );
290 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, z21 );
292 n_ahead,
293 A22, rs_A, cs_A,
294 a21, rs_A,
295 y21, rs_Y,
296 z21, rs_Z );
297
298 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, d0 );
299 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Y20, a21, FLA_ZERO, e0 );
300 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, a21, FLA_ZERO, f0 );
301 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
302 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, y21 );
303 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, e0, FLA_ONE, z21 );
304 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, d0, FLA_ONE, z21 );
305 // FLA_Copy( d0, t01 );
307 n_behind,
308 buff_m1,
309 A20, rs_A, cs_A,
310 Y20, rs_Y, cs_Y,
311 Z20, rs_Z, cs_Z,
312 t01, rs_T,
313 a21, rs_A,
314 y21, rs_Y,
315 z21, rs_Z );
316
317 // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
318 // FLA_Inv_scal( FLA_TWO, beta );
319 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
321 m_ahead,
322 a21, rs_A,
323 z21, rs_Z,
324 &beta );
327
328 // FLA_Scal( minus_inv_tau11, conj_beta );
329 // FLA_Axpy( conj_beta, a21, y21 );
330 // FLA_Scal( inv_tau11, y21 );
333 m_ahead,
334 &conj_beta,
335 a21, rs_A,
336 y21, rs_Y );
338 m_ahead,
339 &inv_tau11,
340 y21, rs_Y );
341
342 // FLA_Scal( minus_inv_tau11, beta );
343 // FLA_Axpy( beta, a21, z21 );
344 // FLA_Scal( inv_tau11, z21 );
347 m_ahead,
348 &beta,
349 a21, rs_A,
350 z21, rs_Z );
352 m_ahead,
353 &inv_tau11,
354 z21, rs_Z );
355
356 // FLA_Dot( a12t, a21, dot_product );
357 // FLA_Scal( minus_inv_tau11, dot_product );
358 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
360 m_ahead,
361 a12t, cs_A,
362 a21, rs_A,
363 &dot_product );
366 m_ahead,
368 a21, rs_A,
369 a12t, cs_A );
370
371 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, e0 );
372 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, e0, a21, A02 );
375 m_behind,
376 n_ahead,
377 buff_1,
378 A02, rs_A, cs_A,
379 a21, rs_A,
380 buff_0,
381 e0, inc_e );
384 m_behind,
385 n_ahead,
387 e0, inc_e,
388 a21, rs_A,
389 A02, rs_A, cs_A );
390
391 // FLA_Copy( first_elem, a21_t );
392 *a21_t = first_elem;
393 }
394
395 /*------------------------------------------------------------*/
396
397 }
398
399 // FLA_Obj_free( &e );
400 FLA_free( buff_e );
401
402 return FLA_SUCCESS;
403}
FLA_Error FLA_Fused_Ahx_Ax_ops_var1(int m_A, int n_A, float *buff_A, int rs_A, int cs_A, float *buff_x, int inc_x, float *buff_v, int inc_v, float *buff_w, int inc_w)
Definition FLA_Fused_Ahx_Ax_opt_var1.c:116
FLA_Error FLA_Fused_Uhu_Yhu_Zhu_ops_var1(int m_U, int n_U, float *buff_delta, float *buff_U, int rs_U, int cs_U, float *buff_Y, int rs_Y, int cs_Y, float *buff_Z, int rs_Z, int cs_Z, float *buff_t, int inc_t, float *buff_u, int inc_u, float *buff_y, int inc_y, float *buff_z, int inc_z)
Definition FLA_Fused_Uhu_Yhu_Zhu_opt_var1.c:156
FLA_Error FLA_Househ2_UT_l_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *tau)
Definition FLA_Househ2_UT.c:160
void bl1_saxpyv(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy)
Definition bl1_axpyv.c:13
void bl1_sdot(conj1_t conj, int n, float *x, int incx, float *y, int incy, float *rho)
Definition bl1_dot.c:13
void bl1_sgemv(trans1_t transa, conj1_t conjx, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
Definition bl1_gemv.c:13
void bl1_sger(conj1_t conjx, conj1_t conjy, int m, int n, float *alpha, float *x, int incx, float *y, int incy, float *a, int a_rs, int a_cs)
Definition bl1_ger.c:13
void bl1_sscalv(conj1_t conj, int n, float *alpha, float *x, int incx)
Definition bl1_scalv.c:13
void bl1_ssetm(int m, int n, float *sigma, float *a, int a_rs, int a_cs)
Definition bl1_setm.c:29

References bl1_saxpyv(), bl1_sdot(), bl1_sgemv(), bl1_sger(), bl1_sscalv(), bl1_ssetm(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Fused_Ahx_Ax_ops_var1(), FLA_Fused_Uhu_Yhu_Zhu_ops_var1(), FLA_Househ2_UT_l_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_ofu_var4().

◆ FLA_Hess_UT_step_ofu_var4()

FLA_Error FLA_Hess_UT_step_ofu_var4 ( FLA_Obj  A,
FLA_Obj  Y,
FLA_Obj  Z,
FLA_Obj  T 
)
30{
31 FLA_Datatype datatype;
32 int m_A, m_T;
33 int rs_A, cs_A;
34 int rs_Y, cs_Y;
35 int rs_Z, cs_Z;
36 int rs_T, cs_T;
37
38 datatype = FLA_Obj_datatype( A );
39
40 m_A = FLA_Obj_length( A );
41 m_T = FLA_Obj_length( T );
42
45
48
51
54
55
56 switch ( datatype )
57 {
58 case FLA_FLOAT:
59 {
60 float* buff_A = FLA_FLOAT_PTR( A );
61 float* buff_Y = FLA_FLOAT_PTR( Y );
62 float* buff_Z = FLA_FLOAT_PTR( Z );
63 float* buff_T = FLA_FLOAT_PTR( T );
64
66 m_T,
70 buff_T, rs_T, cs_T );
71
72 break;
73 }
74
75 case FLA_DOUBLE:
76 {
77 double* buff_A = FLA_DOUBLE_PTR( A );
78 double* buff_Y = FLA_DOUBLE_PTR( Y );
79 double* buff_Z = FLA_DOUBLE_PTR( Z );
80 double* buff_T = FLA_DOUBLE_PTR( T );
81
83 m_T,
87 buff_T, rs_T, cs_T );
88
89 break;
90 }
91
92 case FLA_COMPLEX:
93 {
98
100 m_T,
101 buff_A, rs_A, cs_A,
102 buff_Y, rs_Y, cs_Y,
103 buff_Z, rs_Z, cs_Z,
104 buff_T, rs_T, cs_T );
105
106 break;
107 }
108
110 {
115
117 m_T,
118 buff_A, rs_A, cs_A,
119 buff_Y, rs_Y, cs_Y,
120 buff_Z, rs_Z, cs_Z,
121 buff_T, rs_T, cs_T );
122
123 break;
124 }
125 }
126
127 return FLA_SUCCESS;
128}
FLA_Error FLA_Hess_UT_step_ofz_var4(int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_Y, int rs_Y, int cs_Y, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_T, int rs_T, int cs_T)
Definition FLA_Hess_UT_fus_var4.c:957
FLA_Error FLA_Hess_UT_step_ofs_var4(int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_Y, int rs_Y, int cs_Y, float *buff_Z, int rs_Z, int cs_Z, float *buff_T, int rs_T, int cs_T)
Definition FLA_Hess_UT_fus_var4.c:132
FLA_Error FLA_Hess_UT_step_ofd_var4(int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_Y, int rs_Y, int cs_Y, double *buff_Z, int rs_Z, int cs_Z, double *buff_T, int rs_T, int cs_T)
Definition FLA_Hess_UT_fus_var4.c:407
FLA_Error FLA_Hess_UT_step_ofc_var4(int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_Y, int rs_Y, int cs_Y, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_T, int rs_T, int cs_T)
Definition FLA_Hess_UT_fus_var4.c:682
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition FLA_Query.c:167
dim_t FLA_Obj_length(FLA_Obj obj)
Definition FLA_Query.c:116
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition FLA_Query.c:174
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition FLA_Query.c:13
int FLA_Datatype
Definition FLA_type_defs.h:49
Definition blis_type_defs.h:138

References FLA_Hess_UT_step_ofc_var4(), FLA_Hess_UT_step_ofd_var4(), FLA_Hess_UT_step_ofs_var4(), FLA_Hess_UT_step_ofz_var4(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and i.

Referenced by FLA_Hess_UT_blf_var4(), and FLA_Hess_UT_ofu_var4().

◆ FLA_Hess_UT_step_ofz_var4()

FLA_Error FLA_Hess_UT_step_ofz_var4 ( int  m_A,
int  m_T,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_Y,
int  rs_Y,
int  cs_Y,
dcomplex buff_Z,
int  rs_Z,
int  cs_Z,
dcomplex buff_T,
int  rs_T,
int  cs_T 
)
963{
968
974 int i;
975
976 // b_alg = FLA_Obj_length( T );
977 int b_alg = m_T;
978
979 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &d );
980 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &e );
981 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
982 dcomplex* buff_e = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
983 int inc_e = 1;
984
985 // FLA_Set( FLA_ZERO, Y );
986 // FLA_Set( FLA_ZERO, Z );
987 bl1_zsetm( m_A,
988 b_alg,
989 buff_0,
990 buff_Y, rs_Y, cs_Y );
991 bl1_zsetm( m_A,
992 b_alg,
993 buff_0,
994 buff_Z, rs_Z, cs_Z );
995
996 for ( i = 0; i < b_alg; ++i )
997 {
998 dcomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
999 dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
1000 dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
1001 dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
1002 dcomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
1003 dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
1004 dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
1005
1006 dcomplex* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
1007 dcomplex* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
1008 dcomplex* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
1009
1010 dcomplex* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
1011 dcomplex* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
1012 dcomplex* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
1013
1014 dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
1015 dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
1016
1017 dcomplex* e0 = buff_e + (0 )*inc_e;
1018
1019 dcomplex* a10t_r = a10t + (i-1)*cs_A + (0 )*rs_A;
1020
1021 dcomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
1022 dcomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
1023
1024 dcomplex* ABL = a10t;
1025 dcomplex* ZBL = z10t;
1026
1027 dcomplex* a2 = alpha11;
1028
1029 int m_ahead = m_A - i - 1;
1030 int n_ahead = m_A - i - 1;
1031 int m_behind = i;
1032 int n_behind = i;
1033
1034 /*------------------------------------------------------------*/
1035
1036 if ( m_behind > 0 )
1037 {
1038 // FLA_Copy( a10t_r, last_elem );
1039 // FLA_Set( FLA_ONE, a10t_r );
1040 last_elem = *a10t_r;
1041 *a10t_r = *buff_1;
1042 }
1043
1044 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
1045 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a10t, FLA_ONE, a2 );
1048 m_ahead + 1,
1049 n_behind,
1050 buff_m1,
1051 ABL, rs_A, cs_A,
1052 y10t, cs_Y,
1053 buff_1,
1054 a2, rs_A );
1057 m_ahead + 1,
1058 n_behind,
1059 buff_m1,
1060 ZBL, rs_Z, cs_Z,
1061 a10t, cs_A,
1062 buff_1,
1063 a2, rs_A );
1064
1065 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
1066 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, A20, z10t, FLA_ONE, a12t );
1069 m_ahead,
1070 n_behind,
1071 buff_m1,
1072 Y20, rs_Y, cs_Y,
1073 a10t, cs_A,
1074 buff_1,
1075 a12t, cs_A );
1078 m_ahead,
1079 n_behind,
1080 buff_m1,
1081 A20, rs_A, cs_A,
1082 z10t, cs_Z,
1083 buff_1,
1084 a12t, cs_A );
1085
1086 if ( m_behind > 0 )
1087 {
1088 // FLA_Copy( last_elem, a10t_r );
1089 *a10t_r = last_elem;
1090 }
1091
1092 if ( m_ahead > 0 )
1093 {
1094 // FLA_Househ2_UT( FLA_LEFT,
1095 // a21_t,
1096 // a21_b, tau11 );
1098 a21_t,
1099 a21_b, rs_A,
1100 tau11 );
1101
1102 // FLA_Set( FLA_ONE, inv_tau11 );
1103 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
1104 // FLA_Copy( inv_tau11, minus_inv_tau11 );
1105 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
1108
1109 // FLA_Copy( a21_t, first_elem );
1110 // FLA_Set( FLA_ONE, a21_t );
1111 first_elem = *a21_t;
1112 *a21_t = *buff_1;
1113
1114 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, y21 );
1115 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, z21 );
1117 n_ahead,
1118 A22, rs_A, cs_A,
1119 a21, rs_A,
1120 y21, rs_Y,
1121 z21, rs_Z );
1122
1123 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, d0 );
1124 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Y20, a21, FLA_ZERO, e0 );
1125 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, a21, FLA_ZERO, f0 );
1126 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
1127 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, y21 );
1128 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, e0, FLA_ONE, z21 );
1129 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, d0, FLA_ONE, z21 );
1130 // FLA_Copy( d0, t01 );
1132 n_behind,
1133 buff_m1,
1134 A20, rs_A, cs_A,
1135 Y20, rs_Y, cs_Y,
1136 Z20, rs_Z, cs_Z,
1137 t01, rs_T,
1138 a21, rs_A,
1139 y21, rs_Y,
1140 z21, rs_Z );
1141
1142 // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
1143 // FLA_Inv_scal( FLA_TWO, beta );
1144 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
1146 m_ahead,
1147 a21, rs_A,
1148 z21, rs_Z,
1149 &beta );
1152
1153 // FLA_Scal( minus_inv_tau11, conj_beta );
1154 // FLA_Axpy( conj_beta, a21, y21 );
1155 // FLA_Scal( inv_tau11, y21 );
1158 m_ahead,
1159 &conj_beta,
1160 a21, rs_A,
1161 y21, rs_Y );
1163 m_ahead,
1164 &inv_tau11,
1165 y21, rs_Y );
1166
1167 // FLA_Scal( minus_inv_tau11, beta );
1168 // FLA_Axpy( beta, a21, z21 );
1169 // FLA_Scal( inv_tau11, z21 );
1172 m_ahead,
1173 &beta,
1174 a21, rs_A,
1175 z21, rs_Z );
1177 m_ahead,
1178 &inv_tau11,
1179 z21, rs_Z );
1180
1181 // FLA_Dot( a12t, a21, dot_product );
1182 // FLA_Scal( minus_inv_tau11, dot_product );
1183 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
1185 m_ahead,
1186 a12t, cs_A,
1187 a21, rs_A,
1188 &dot_product );
1191 m_ahead,
1192 &dot_product,
1193 a21, rs_A,
1194 a12t, cs_A );
1195
1196 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, e0 );
1197 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, e0, a21, A02 );
1200 m_behind,
1201 n_ahead,
1202 buff_1,
1203 A02, rs_A, cs_A,
1204 a21, rs_A,
1205 buff_0,
1206 e0, inc_e );
1209 m_behind,
1210 n_ahead,
1212 e0, inc_e,
1213 a21, rs_A,
1214 A02, rs_A, cs_A );
1215
1216 // FLA_Copy( first_elem, a21_t );
1217 *a21_t = first_elem;
1218 }
1219
1220 /*------------------------------------------------------------*/
1221
1222 }
1223
1224 // FLA_Obj_free( &e );
1225 FLA_free( buff_e );
1226
1227 return FLA_SUCCESS;
1228}
FLA_Error FLA_Fused_Ahx_Ax_opz_var1(int m_A, int n_A, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_x, int inc_x, dcomplex *buff_v, int inc_v, dcomplex *buff_w, int inc_w)
Definition FLA_Fused_Ahx_Ax_opt_var1.c:307
FLA_Error FLA_Fused_Uhu_Yhu_Zhu_opz_var1(int m_U, int n_U, dcomplex *buff_delta, dcomplex *buff_U, int rs_U, int cs_U, dcomplex *buff_Y, int rs_Y, int cs_Y, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_t, int inc_t, dcomplex *buff_u, int inc_u, dcomplex *buff_y, int inc_y, dcomplex *buff_z, int inc_z)
Definition FLA_Fused_Uhu_Yhu_Zhu_opt_var1.c:500
FLA_Error FLA_Househ2_UT_l_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *tau)
Definition FLA_Househ2_UT.c:521
void bl1_zaxpyv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy)
Definition bl1_axpyv.c:60
void bl1_zdot(conj1_t conj, int n, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *rho)
Definition bl1_dot.c:65
bl1_zscals(beta, rho_yz)
void bl1_zgemv(trans1_t transa, conj1_t conjx, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition bl1_gemv.c:255
void bl1_zger(conj1_t conjx, conj1_t conjy, int m, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *a, int a_rs, int a_cs)
Definition bl1_ger.c:194
void bl1_zscalv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx)
Definition bl1_scalv.c:72
void bl1_zsetm(int m, int n, dcomplex *sigma, dcomplex *a, int a_rs, int a_cs)
Definition bl1_setm.c:78

References bl1_zaxpyv(), bl1_zdot(), bl1_zgemv(), bl1_zger(), bl1_zscals(), bl1_zscalv(), bl1_zsetm(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Fused_Ahx_Ax_opz_var1(), FLA_Fused_Uhu_Yhu_Zhu_opz_var1(), FLA_Househ2_UT_l_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_ofu_var4().