libflame revision_anchor
Functions
FLA_Hess_UT_fus_var3.c File Reference

(r)

Functions

FLA_Error FLA_Hess_UT_ofu_var3 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_ofu_var3 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_ofs_var3 (int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_ofd_var3 (int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_ofc_var3 (int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_ofz_var3 (int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T)
 

Function Documentation

◆ FLA_Hess_UT_ofu_var3()

FLA_Error FLA_Hess_UT_ofu_var3 ( FLA_Obj  A,
FLA_Obj  T 
)
14{
15 return FLA_Hess_UT_step_ofu_var3( A, T );
16}
FLA_Error FLA_Hess_UT_step_ofu_var3(FLA_Obj A, FLA_Obj T)
Definition FLA_Hess_UT_fus_var3.c:18
int i
Definition bl1_axmyv2.c:145

References FLA_Hess_UT_step_ofu_var3(), and i.

◆ FLA_Hess_UT_step_ofc_var3()

FLA_Error FLA_Hess_UT_step_ofc_var3 ( int  m_A,
int  m_T,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_T,
int  rs_T,
int  cs_T 
)
753{
758
767 int i;
768
769 // b_alg = FLA_Obj_length( T );
770 int b_alg = m_T;
771
772 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
773 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &y );
774 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
775 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &v );
776 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
777 scomplex* buff_u = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
778 scomplex* buff_y = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
779 scomplex* buff_z = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
780 scomplex* buff_v = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
781 scomplex* buff_w = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
782 int inc_u = 1;
783 int inc_y = 1;
784 int inc_z = 1;
785 int inc_v = 1;
786 int inc_w = 1;
787
788 for ( i = 0; i < b_alg; ++i )
789 {
790 scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
791 scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
792 scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
793 scomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
794 scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
795 scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
796
797 scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
798 scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
799
801 scomplex* u2 = buff_u + (i+1)*inc_u;
802
803 scomplex* y0 = buff_y + (0 )*inc_y;
804 scomplex* psi1 = buff_y + (i )*inc_y;
805 scomplex* y2 = buff_y + (i+1)*inc_y;
806
807 scomplex* zeta1 = buff_z + (i )*inc_z;
808 scomplex* z2 = buff_z + (i+1)*inc_z;
809
810 scomplex* v2 = buff_v + (i+1)*inc_v;
811
812 scomplex* w2 = buff_w + (i+1)*inc_w;
813
814 scomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
815 scomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
816
817 int m_ahead = m_A - i - 1;
818 int n_ahead = m_A - i - 1;
819 int m_behind = i;
820 int n_behind = i;
821
822 /*------------------------------------------------------------*/
823
824 if ( m_behind > 0 )
825 {
826 // FLA_Copy( upsilon1, minus_upsilon1 );
827 // FLA_Scal( FLA_MINUS_ONE, minus_upsilon1 );
828 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon1, minus_conj_upsilon1 );
831
832 // FLA_Copy( psi1, minus_psi1 );
833 // FLA_Scal( FLA_MINUS_ONE, minus_psi1 );
834 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, minus_psi1, minus_conj_psi1 );
837
838 // FLA_Copy( zeta1, minus_zeta1 );
839 // FLA_Scal( FLA_MINUS_ONE, minus_zeta1 );
841
842 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon1, psi1, alpha11 );
843 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_zeta1, upsilon1, alpha11 );
845 1,
847 psi1, 1,
848 alpha11, 1 );
850 1,
852 upsilon1, 1,
853 alpha11, 1 );
854
855 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_upsilon1, y2, a12t );
856 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_zeta1, u2, a12t );
858 m_ahead,
860 y2, inc_y,
861 a12t, cs_A );
863 m_ahead,
865 u2, inc_u,
866 a12t, cs_A );
867
868 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi1, u2, a21 );
869 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_upsilon1, z2, a21 );
871 m_ahead,
873 u2, inc_u,
874 a21, rs_A );
876 m_ahead,
878 z2, inc_z,
879 a21, rs_A );
880 }
881
882 if ( m_ahead > 0 )
883 {
884 // FLA_Househ2_UT( FLA_LEFT,
885 // a21_t,
886 // a21_b, tau11 );
888 a21_t,
889 a21_b, rs_A,
890 tau11 );
891
892 // FLA_Set( FLA_ONE, inv_tau11 );
893 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
894 // FLA_Copy( inv_tau11, minus_inv_tau11 );
895 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
898
899 // FLA_Copy( a21_t, first_elem );
900 // FLA_Set( FLA_ONE, a21_t );
901 first_elem = *a21_t;
902 *a21_t = *buff_1;
903 }
904
905 if ( m_behind > 0 && m_ahead > 0 )
906 {
907 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u2, y2, A22 );
908 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, u2, A22 );
909 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, v2 );
910 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, w2 );
912 n_ahead,
913 buff_m1,
914 u2, inc_u,
915 y2, inc_y,
916 z2, inc_z,
917 A22, rs_A, cs_A,
918 a21, rs_A,
919 v2, inc_v,
920 w2, inc_w );
921 }
922 else if ( m_ahead > 0 )
923 {
924 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, v2 );
925 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, w2 );
927 n_ahead,
928 A22, rs_A, cs_A,
929 a21, rs_A,
930 v2, inc_v,
931 w2, inc_w );
932 }
933
934 if ( m_ahead > 0 )
935 {
936 // FLA_Copy( a21, u2 );
937 // FLA_Copy( v2, y2 );
938 // FLA_Copy( w2, z2 );
940 m_ahead,
941 a21, rs_A,
942 u2, inc_u );
944 m_ahead,
945 v2, inc_v,
946 y2, inc_y );
948 m_ahead,
949 w2, inc_w,
950 z2, inc_z );
951
952 // FLA_Dotc( FLA_CONJUGATE, a21, z2, beta );
953 // FLA_Inv_scal( FLA_TWO, beta );
954 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
956 m_ahead,
957 a21, rs_A,
958 z2, inc_z,
959 &beta );
962
963 // FLA_Scal( minus_inv_tau11, conj_beta );
964 // FLA_Axpy( conj_beta, a21, y2 );
965 // FLA_Scal( inv_tau11, y2 );
968 m_ahead,
969 &conj_beta,
970 a21, rs_A,
971 y2, inc_y );
973 m_ahead,
974 &inv_tau11,
975 y2, inc_y );
976
977 // FLA_Scal( minus_inv_tau11, beta );
978 // FLA_Axpy( beta, a21, z2 );
979 // FLA_Scal( inv_tau11, z2 );
982 m_ahead,
983 &beta,
984 a21, rs_A,
985 z2, inc_z );
987 m_ahead,
988 &inv_tau11,
989 z2, inc_z );
990
991 // FLA_Dot( a12t, a21, dot_product );
992 // FLA_Scal( minus_inv_tau11, dot_product );
993 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
995 m_ahead,
996 a12t, cs_A,
997 a21, rs_A,
998 &dot_product );
1001 m_ahead,
1002 &dot_product,
1003 a21, rs_A,
1004 a12t, cs_A );
1005
1006 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, y0 );
1007 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, y0, a21, A02 );
1010 m_behind,
1011 n_ahead,
1012 buff_1,
1013 A02, rs_A, cs_A,
1014 a21, rs_A,
1015 buff_0,
1016 y0, inc_y );
1019 m_behind,
1020 n_ahead,
1022 y0, inc_y,
1023 a21, rs_A,
1024 A02, rs_A, cs_A );
1025
1026 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
1029 m_ahead,
1030 n_behind,
1031 buff_1,
1032 A20, rs_A, cs_A,
1033 a21, rs_A,
1034 buff_0,
1035 t01, rs_T );
1036
1037 // FLA_Copy( first_elem, a21_t );
1038 *a21_t = first_elem;
1039 }
1040
1041 if ( m_behind + 1 == b_alg && m_ahead > 0 )
1042 {
1043 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u2, y2, A22 );
1044 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, u2, A22 );
1046 n_ahead,
1047 buff_m1,
1048 u2, inc_u,
1049 y2, inc_y,
1050 z2, inc_z,
1051 u2, inc_u,
1052 A22, rs_A, cs_A );
1053 }
1054
1055 /*------------------------------------------------------------*/
1056
1057 }
1058
1059 // FLA_Obj_free( &u );
1060 // FLA_Obj_free( &y );
1061 // FLA_Obj_free( &z );
1062 // FLA_Obj_free( &v );
1063 // FLA_Obj_free( &w );
1064 FLA_free( buff_u );
1065 FLA_free( buff_y );
1066 FLA_free( buff_z );
1067 FLA_free( buff_v );
1068 FLA_free( buff_w );
1069
1070 return FLA_SUCCESS;
1071}
FLA_Error FLA_Fused_Gerc2_opc_var1(int m_A, int n_A, scomplex *buff_alpha, scomplex *buff_u, int inc_u, scomplex *buff_y, int inc_y, scomplex *buff_z, int inc_z, scomplex *buff_v, int inc_v, scomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Fused_Gerc2_opt_var1.c:241
FLA_Error FLA_Fused_Ahx_Ax_opc_var1(int m_A, int n_A, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_x, int inc_x, scomplex *buff_v, int inc_v, scomplex *buff_w, int inc_w)
Definition FLA_Fused_Ahx_Ax_opt_var1.c:256
FLA_Error FLA_Fused_Gerc2_Ahx_Ax_opc_var1(int m_A, int n_A, scomplex *buff_alpha, scomplex *buff_u, int inc_u, scomplex *buff_y, int inc_y, scomplex *buff_z, int inc_z, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_x, int inc_x, scomplex *buff_v, int inc_v, scomplex *buff_w, int inc_w)
Definition FLA_Fused_Gerc2_Ahx_Ax_opt_var1.c:327
FLA_Obj FLA_TWO
Definition FLA_Init.c:17
FLA_Obj FLA_MINUS_ONE
Definition FLA_Init.c:22
FLA_Obj FLA_ZERO
Definition FLA_Init.c:20
FLA_Obj FLA_ONE
Definition FLA_Init.c:18
void FLA_free(void *ptr)
Definition FLA_Memory.c:247
void * FLA_malloc(size_t size)
Definition FLA_Memory.c:111
FLA_Error FLA_Househ2_UT_l_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *tau)
Definition FLA_Househ2_UT.c:390
double *restrict zeta1
Definition bl1_axmyv2.c:142
double *restrict psi1
Definition bl1_axmyv2.c:141
upsilon1
Definition bl1_axpyv2bdotaxpy.c:225
void bl1_caxpyv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
Definition bl1_axpyv.c:29
void bl1_ccopyv(conj1_t conj, int m, scomplex *x, int incx, scomplex *y, int incy)
Definition bl1_copyv.c:49
void bl1_cdot(conj1_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho)
Definition bl1_dot.c:39
void bl1_cgemv(trans1_t transa, conj1_t conjx, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition bl1_gemv.c:125
void bl1_cger(conj1_t conjx, conj1_t conjy, int m, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy, scomplex *a, int a_rs, int a_cs)
Definition bl1_ger.c:111
void bl1_cscalv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx)
Definition bl1_scalv.c:46
@ BLIS1_NO_TRANSPOSE
Definition blis_type_defs.h:54
@ BLIS1_CONJ_TRANSPOSE
Definition blis_type_defs.h:57
@ BLIS1_CONJUGATE
Definition blis_type_defs.h:82
@ BLIS1_NO_CONJUGATE
Definition blis_type_defs.h:81
Definition blis_type_defs.h:133

References bl1_caxpyv(), bl1_ccopyv(), bl1_cdot(), bl1_cgemv(), bl1_cger(), bl1_cscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Fused_Ahx_Ax_opc_var1(), FLA_Fused_Gerc2_Ahx_Ax_opc_var1(), FLA_Fused_Gerc2_opc_var1(), FLA_Househ2_UT_l_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, i, psi1, upsilon1, and zeta1.

Referenced by FLA_Hess_UT_step_ofu_var3().

◆ FLA_Hess_UT_step_ofd_var3()

FLA_Error FLA_Hess_UT_step_ofd_var3 ( int  m_A,
int  m_T,
double buff_A,
int  rs_A,
int  cs_A,
double buff_T,
int  rs_T,
int  cs_T 
)
427{
428 double* buff_2 = FLA_DOUBLE_PTR( FLA_TWO );
429 double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
430 double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
432
433 double first_elem;
434 double dot_product;
435 double beta, conj_beta;
436 double inv_tau11;
437 double minus_inv_tau11;
440 double minus_zeta1;
441 int i;
442
443 // b_alg = FLA_Obj_length( T );
444 int b_alg = m_T;
445
446 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
447 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &y );
448 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
449 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &v );
450 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
451 double* buff_u = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
452 double* buff_y = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
453 double* buff_z = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
454 double* buff_v = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
455 double* buff_w = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
456 int inc_u = 1;
457 int inc_y = 1;
458 int inc_z = 1;
459 int inc_v = 1;
460 int inc_w = 1;
461
462 for ( i = 0; i < b_alg; ++i )
463 {
464 double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
465 double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
466 double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
467 double* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
468 double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
469 double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
470
471 double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
472 double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
473
474 double* upsilon1 = buff_u + (i )*inc_u;
475 double* u2 = buff_u + (i+1)*inc_u;
476
477 double* y0 = buff_y + (0 )*inc_y;
478 double* psi1 = buff_y + (i )*inc_y;
479 double* y2 = buff_y + (i+1)*inc_y;
480
481 double* zeta1 = buff_z + (i )*inc_z;
482 double* z2 = buff_z + (i+1)*inc_z;
483
484 double* v2 = buff_v + (i+1)*inc_v;
485
486 double* w2 = buff_w + (i+1)*inc_w;
487
488 double* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
489 double* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
490
491 int m_ahead = m_A - i - 1;
492 int n_ahead = m_A - i - 1;
493 int m_behind = i;
494 int n_behind = i;
495
496 /*------------------------------------------------------------*/
497
498 if ( m_behind > 0 )
499 {
500 // FLA_Copy( upsilon1, minus_upsilon1 );
501 // FLA_Scal( FLA_MINUS_ONE, minus_upsilon1 );
502 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon1, minus_conj_upsilon1 );
505
506 // FLA_Copy( psi1, minus_psi1 );
507 // FLA_Scal( FLA_MINUS_ONE, minus_psi1 );
508 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, minus_psi1, minus_conj_psi1 );
511
512 // FLA_Copy( zeta1, minus_zeta1 );
513 // FLA_Scal( FLA_MINUS_ONE, minus_zeta1 );
515
516 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon1, psi1, alpha11 );
517 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_zeta1, upsilon1, alpha11 );
519 1,
521 psi1, 1,
522 alpha11, 1 );
524 1,
526 upsilon1, 1,
527 alpha11, 1 );
528
529 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_upsilon1, y2, a12t );
530 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_zeta1, u2, a12t );
532 m_ahead,
534 y2, inc_y,
535 a12t, cs_A );
537 m_ahead,
539 u2, inc_u,
540 a12t, cs_A );
541
542 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi1, u2, a21 );
543 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_upsilon1, z2, a21 );
545 m_ahead,
547 u2, inc_u,
548 a21, rs_A );
550 m_ahead,
552 z2, inc_z,
553 a21, rs_A );
554 }
555
556 if ( m_ahead > 0 )
557 {
558 // FLA_Househ2_UT( FLA_LEFT,
559 // a21_t,
560 // a21_b, tau11 );
562 a21_t,
563 a21_b, rs_A,
564 tau11 );
565
566 // FLA_Set( FLA_ONE, inv_tau11 );
567 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
568 // FLA_Copy( inv_tau11, minus_inv_tau11 );
569 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
572
573 // FLA_Copy( a21_t, first_elem );
574 // FLA_Set( FLA_ONE, a21_t );
575 first_elem = *a21_t;
576 *a21_t = *buff_1;
577 }
578
579 if ( m_behind > 0 && m_ahead > 0 )
580 {
581 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u2, y2, A22 );
582 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, u2, A22 );
583 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, v2 );
584 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, w2 );
586 n_ahead,
587 buff_m1,
588 u2, inc_u,
589 y2, inc_y,
590 z2, inc_z,
591 A22, rs_A, cs_A,
592 a21, rs_A,
593 v2, inc_v,
594 w2, inc_w );
595 }
596 else if ( m_ahead > 0 )
597 {
598 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, v2 );
599 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, w2 );
601 n_ahead,
602 A22, rs_A, cs_A,
603 a21, rs_A,
604 v2, inc_v,
605 w2, inc_w );
606 }
607
608 if ( m_ahead > 0 )
609 {
610 // FLA_Copy( a21, u2 );
611 // FLA_Copy( v2, y2 );
612 // FLA_Copy( w2, z2 );
614 m_ahead,
615 a21, rs_A,
616 u2, inc_u );
618 m_ahead,
619 v2, inc_v,
620 y2, inc_y );
622 m_ahead,
623 w2, inc_w,
624 z2, inc_z );
625
626 // FLA_Dotc( FLA_CONJUGATE, a21, z2, beta );
627 // FLA_Inv_scal( FLA_TWO, beta );
628 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
630 m_ahead,
631 a21, rs_A,
632 z2, inc_z,
633 &beta );
636
637 // FLA_Scal( minus_inv_tau11, conj_beta );
638 // FLA_Axpy( conj_beta, a21, y2 );
639 // FLA_Scal( inv_tau11, y2 );
642 m_ahead,
643 &conj_beta,
644 a21, rs_A,
645 y2, inc_y );
647 m_ahead,
648 &inv_tau11,
649 y2, inc_y );
650
651 // FLA_Scal( minus_inv_tau11, beta );
652 // FLA_Axpy( beta, a21, z2 );
653 // FLA_Scal( inv_tau11, z2 );
656 m_ahead,
657 &beta,
658 a21, rs_A,
659 z2, inc_z );
661 m_ahead,
662 &inv_tau11,
663 z2, inc_z );
664
665 // FLA_Dot( a12t, a21, dot_product );
666 // FLA_Scal( minus_inv_tau11, dot_product );
667 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
669 m_ahead,
670 a12t, cs_A,
671 a21, rs_A,
672 &dot_product );
675 m_ahead,
677 a21, rs_A,
678 a12t, cs_A );
679
680 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, y0 );
681 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, y0, a21, A02 );
684 m_behind,
685 n_ahead,
686 buff_1,
687 A02, rs_A, cs_A,
688 a21, rs_A,
689 buff_0,
690 y0, inc_y );
693 m_behind,
694 n_ahead,
696 y0, inc_y,
697 a21, rs_A,
698 A02, rs_A, cs_A );
699
700 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
703 m_ahead,
704 n_behind,
705 buff_1,
706 A20, rs_A, cs_A,
707 a21, rs_A,
708 buff_0,
709 t01, rs_T );
710
711 // FLA_Copy( first_elem, a21_t );
712 *a21_t = first_elem;
713 }
714
715 if ( m_behind + 1 == b_alg && m_ahead > 0 )
716 {
717 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u2, y2, A22 );
718 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, u2, A22 );
720 n_ahead,
721 buff_m1,
722 u2, inc_u,
723 y2, inc_y,
724 z2, inc_z,
725 u2, inc_u,
726 A22, rs_A, cs_A );
727 }
728
729 /*------------------------------------------------------------*/
730
731 }
732
733 // FLA_Obj_free( &u );
734 // FLA_Obj_free( &y );
735 // FLA_Obj_free( &z );
736 // FLA_Obj_free( &v );
737 // FLA_Obj_free( &w );
738 FLA_free( buff_u );
739 FLA_free( buff_y );
740 FLA_free( buff_z );
741 FLA_free( buff_v );
742 FLA_free( buff_w );
743
744 return FLA_SUCCESS;
745}
FLA_Error FLA_Fused_Gerc2_opd_var1(int m_A, int n_A, double *buff_alpha, double *buff_u, int inc_u, double *buff_y, int inc_y, double *buff_z, int inc_z, double *buff_v, int inc_v, double *buff_A, int rs_A, int cs_A)
Definition FLA_Fused_Gerc2_opt_var1.c:193
FLA_Error FLA_Fused_Ahx_Ax_opd_var1(int m_A, int n_A, double *buff_A, int rs_A, int cs_A, double *buff_x, int inc_x, double *buff_v, int inc_v, double *buff_w, int inc_w)
Definition FLA_Fused_Ahx_Ax_opt_var1.c:173
FLA_Error FLA_Fused_Gerc2_Ahx_Ax_opd_var1(int m_A, int n_A, double *buff_alpha, double *buff_u, int inc_u, double *buff_y, int inc_y, double *buff_z, int inc_z, double *buff_A, int rs_A, int cs_A, double *buff_x, int inc_x, double *buff_v, int inc_v, double *buff_w, int inc_w)
Definition FLA_Fused_Gerc2_Ahx_Ax_opt_var1.c:248
FLA_Error FLA_Househ2_UT_l_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *tau)
Definition FLA_Househ2_UT.c:274
void bl1_daxpyv(conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy)
Definition bl1_axpyv.c:21
void bl1_dcopyv(conj1_t conj, int m, double *x, int incx, double *y, int incy)
Definition bl1_copyv.c:42
void bl1_ddot(conj1_t conj, int n, double *x, int incx, double *y, int incy, double *rho)
Definition bl1_dot.c:26
void bl1_dgemv(trans1_t transa, conj1_t conjx, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
Definition bl1_gemv.c:69
void bl1_dger(conj1_t conjx, conj1_t conjy, int m, int n, double *alpha, double *x, int incx, double *y, int incy, double *a, int a_rs, int a_cs)
Definition bl1_ger.c:62
void bl1_dscalv(conj1_t conj, int n, double *alpha, double *x, int incx)
Definition bl1_scalv.c:24

References bl1_daxpyv(), bl1_dcopyv(), bl1_ddot(), bl1_dgemv(), bl1_dger(), bl1_dscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Fused_Ahx_Ax_opd_var1(), FLA_Fused_Gerc2_Ahx_Ax_opd_var1(), FLA_Fused_Gerc2_opd_var1(), FLA_Househ2_UT_l_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, i, psi1, upsilon1, and zeta1.

Referenced by FLA_Hess_UT_step_ofu_var3().

◆ FLA_Hess_UT_step_ofs_var3()

FLA_Error FLA_Hess_UT_step_ofs_var3 ( int  m_A,
int  m_T,
float buff_A,
int  rs_A,
int  cs_A,
float buff_T,
int  rs_T,
int  cs_T 
)
101{
102 float* buff_2 = FLA_FLOAT_PTR( FLA_TWO );
103 float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
104 float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
106
107 float first_elem;
108 float dot_product;
109 float beta, conj_beta;
110 float inv_tau11;
111 float minus_inv_tau11;
114 float minus_zeta1;
115 int i;
116
117 // b_alg = FLA_Obj_length( T );
118 int b_alg = m_T;
119
120 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
121 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &y );
122 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
123 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &v );
124 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
125 float* buff_u = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
126 float* buff_y = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
127 float* buff_z = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
128 float* buff_v = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
129 float* buff_w = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
130 int inc_u = 1;
131 int inc_y = 1;
132 int inc_z = 1;
133 int inc_v = 1;
134 int inc_w = 1;
135
136 for ( i = 0; i < b_alg; ++i )
137 {
138 float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
139 float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
140 float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
141 float* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
142 float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
143 float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
144
145 float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
146 float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
147
148 float* upsilon1 = buff_u + (i )*inc_u;
149 float* u2 = buff_u + (i+1)*inc_u;
150
151 float* y0 = buff_y + (0 )*inc_y;
152 float* psi1 = buff_y + (i )*inc_y;
153 float* y2 = buff_y + (i+1)*inc_y;
154
155 float* zeta1 = buff_z + (i )*inc_z;
156 float* z2 = buff_z + (i+1)*inc_z;
157
158 float* v2 = buff_v + (i+1)*inc_v;
159
160 float* w2 = buff_w + (i+1)*inc_w;
161
162 float* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
163 float* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
164
165 int m_ahead = m_A - i - 1;
166 int n_ahead = m_A - i - 1;
167 int m_behind = i;
168 int n_behind = i;
169
170 /*------------------------------------------------------------*/
171
172 if ( m_behind > 0 )
173 {
174 // FLA_Copy( upsilon1, minus_upsilon1 );
175 // FLA_Scal( FLA_MINUS_ONE, minus_upsilon1 );
176 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon1, minus_conj_upsilon1 );
179
180 // FLA_Copy( psi1, minus_psi1 );
181 // FLA_Scal( FLA_MINUS_ONE, minus_psi1 );
182 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, minus_psi1, minus_conj_psi1 );
185
186 // FLA_Copy( zeta1, minus_zeta1 );
187 // FLA_Scal( FLA_MINUS_ONE, minus_zeta1 );
189
190 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon1, psi1, alpha11 );
191 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_zeta1, upsilon1, alpha11 );
193 1,
195 psi1, 1,
196 alpha11, 1 );
198 1,
200 upsilon1, 1,
201 alpha11, 1 );
202
203 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_upsilon1, y2, a12t );
204 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_zeta1, u2, a12t );
206 m_ahead,
208 y2, inc_y,
209 a12t, cs_A );
211 m_ahead,
213 u2, inc_u,
214 a12t, cs_A );
215
216 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi1, u2, a21 );
217 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_upsilon1, z2, a21 );
219 m_ahead,
221 u2, inc_u,
222 a21, rs_A );
224 m_ahead,
226 z2, inc_z,
227 a21, rs_A );
228 }
229
230 if ( m_ahead > 0 )
231 {
232 // FLA_Househ2_UT( FLA_LEFT,
233 // a21_t,
234 // a21_b, tau11 );
236 a21_t,
237 a21_b, rs_A,
238 tau11 );
239
240 // FLA_Set( FLA_ONE, inv_tau11 );
241 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
242 // FLA_Copy( inv_tau11, minus_inv_tau11 );
243 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
246
247 // FLA_Copy( a21_t, first_elem );
248 // FLA_Set( FLA_ONE, a21_t );
249 first_elem = *a21_t;
250 *a21_t = *buff_1;
251 }
252
253 if ( m_behind > 0 && m_ahead > 0 )
254 {
255 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u2, y2, A22 );
256 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, u2, A22 );
257 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, v2 );
258 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, w2 );
260 n_ahead,
261 buff_m1,
262 u2, inc_u,
263 y2, inc_y,
264 z2, inc_z,
265 A22, rs_A, cs_A,
266 a21, rs_A,
267 v2, inc_v,
268 w2, inc_w );
269 }
270 else if ( m_ahead > 0 )
271 {
272 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, v2 );
273 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, w2 );
275 n_ahead,
276 A22, rs_A, cs_A,
277 a21, rs_A,
278 v2, inc_v,
279 w2, inc_w );
280 }
281
282 if ( m_ahead > 0 )
283 {
284 // FLA_Copy( a21, u2 );
285 // FLA_Copy( v2, y2 );
286 // FLA_Copy( w2, z2 );
288 m_ahead,
289 a21, rs_A,
290 u2, inc_u );
292 m_ahead,
293 v2, inc_v,
294 y2, inc_y );
296 m_ahead,
297 w2, inc_w,
298 z2, inc_z );
299
300 // FLA_Dotc( FLA_CONJUGATE, a21, z2, beta );
301 // FLA_Inv_scal( FLA_TWO, beta );
302 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
304 m_ahead,
305 a21, rs_A,
306 z2, inc_z,
307 &beta );
310
311 // FLA_Scal( minus_inv_tau11, conj_beta );
312 // FLA_Axpy( conj_beta, a21, y2 );
313 // FLA_Scal( inv_tau11, y2 );
316 m_ahead,
317 &conj_beta,
318 a21, rs_A,
319 y2, inc_y );
321 m_ahead,
322 &inv_tau11,
323 y2, inc_y );
324
325 // FLA_Scal( minus_inv_tau11, beta );
326 // FLA_Axpy( beta, a21, z2 );
327 // FLA_Scal( inv_tau11, z2 );
330 m_ahead,
331 &beta,
332 a21, rs_A,
333 z2, inc_z );
335 m_ahead,
336 &inv_tau11,
337 z2, inc_z );
338
339 // FLA_Dot( a12t, a21, dot_product );
340 // FLA_Scal( minus_inv_tau11, dot_product );
341 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
343 m_ahead,
344 a12t, cs_A,
345 a21, rs_A,
346 &dot_product );
349 m_ahead,
351 a21, rs_A,
352 a12t, cs_A );
353
354 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, y0 );
355 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, y0, a21, A02 );
358 m_behind,
359 n_ahead,
360 buff_1,
361 A02, rs_A, cs_A,
362 a21, rs_A,
363 buff_0,
364 y0, inc_y );
367 m_behind,
368 n_ahead,
370 y0, inc_y,
371 a21, rs_A,
372 A02, rs_A, cs_A );
373
374 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
377 m_ahead,
378 n_behind,
379 buff_1,
380 A20, rs_A, cs_A,
381 a21, rs_A,
382 buff_0,
383 t01, rs_T );
384
385 // FLA_Copy( first_elem, a21_t );
386 *a21_t = first_elem;
387 }
388
389 if ( m_behind + 1 == b_alg && m_ahead > 0 )
390 {
391 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u2, y2, A22 );
392 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, u2, A22 );
394 n_ahead,
395 buff_m1,
396 u2, inc_u,
397 y2, inc_y,
398 z2, inc_z,
399 u2, inc_u,
400 A22, rs_A, cs_A );
401 }
402
403 /*------------------------------------------------------------*/
404
405 }
406
407 // FLA_Obj_free( &u );
408 // FLA_Obj_free( &y );
409 // FLA_Obj_free( &z );
410 // FLA_Obj_free( &v );
411 // FLA_Obj_free( &w );
412 FLA_free( buff_u );
413 FLA_free( buff_y );
414 FLA_free( buff_z );
415 FLA_free( buff_v );
416 FLA_free( buff_w );
417
418 return FLA_SUCCESS;
419}
FLA_Error FLA_Fused_Gerc2_ops_var1(int m_A, int n_A, float *buff_alpha, float *buff_u, int inc_u, float *buff_y, int inc_y, float *buff_z, int inc_z, float *buff_v, int inc_v, float *buff_A, int rs_A, int cs_A)
Definition FLA_Fused_Gerc2_opt_var1.c:130
FLA_Error FLA_Fused_Ahx_Ax_ops_var1(int m_A, int n_A, float *buff_A, int rs_A, int cs_A, float *buff_x, int inc_x, float *buff_v, int inc_v, float *buff_w, int inc_w)
Definition FLA_Fused_Ahx_Ax_opt_var1.c:116
FLA_Error FLA_Fused_Gerc2_Ahx_Ax_ops_var1(int m_A, int n_A, float *buff_alpha, float *buff_u, int inc_u, float *buff_y, int inc_y, float *buff_z, int inc_z, float *buff_A, int rs_A, int cs_A, float *buff_x, int inc_x, float *buff_v, int inc_v, float *buff_w, int inc_w)
Definition FLA_Fused_Gerc2_Ahx_Ax_opt_var1.c:150
FLA_Error FLA_Househ2_UT_l_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *tau)
Definition FLA_Househ2_UT.c:160
void bl1_saxpyv(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy)
Definition bl1_axpyv.c:13
void bl1_scopyv(conj1_t conj, int m, float *x, int incx, float *y, int incy)
Definition bl1_copyv.c:35
void bl1_sdot(conj1_t conj, int n, float *x, int incx, float *y, int incy, float *rho)
Definition bl1_dot.c:13
void bl1_sgemv(trans1_t transa, conj1_t conjx, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
Definition bl1_gemv.c:13
void bl1_sger(conj1_t conjx, conj1_t conjy, int m, int n, float *alpha, float *x, int incx, float *y, int incy, float *a, int a_rs, int a_cs)
Definition bl1_ger.c:13
void bl1_sscalv(conj1_t conj, int n, float *alpha, float *x, int incx)
Definition bl1_scalv.c:13

References bl1_saxpyv(), bl1_scopyv(), bl1_sdot(), bl1_sgemv(), bl1_sger(), bl1_sscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Fused_Ahx_Ax_ops_var1(), FLA_Fused_Gerc2_Ahx_Ax_ops_var1(), FLA_Fused_Gerc2_ops_var1(), FLA_Househ2_UT_l_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, i, psi1, upsilon1, and zeta1.

Referenced by FLA_Hess_UT_step_ofu_var3().

◆ FLA_Hess_UT_step_ofu_var3()

FLA_Error FLA_Hess_UT_step_ofu_var3 ( FLA_Obj  A,
FLA_Obj  T 
)
19{
20 FLA_Datatype datatype;
21 int m_A, m_T;
22 int rs_A, cs_A;
23 int rs_T, cs_T;
24
25 datatype = FLA_Obj_datatype( A );
26
27 m_A = FLA_Obj_length( A );
28 m_T = FLA_Obj_length( T );
29
32
35
36
37 switch ( datatype )
38 {
39 case FLA_FLOAT:
40 {
41 float* buff_A = FLA_FLOAT_PTR( A );
42 float* buff_T = FLA_FLOAT_PTR( T );
43
45 m_T,
47 buff_T, rs_T, cs_T );
48
49 break;
50 }
51
52 case FLA_DOUBLE:
53 {
54 double* buff_A = FLA_DOUBLE_PTR( A );
55 double* buff_T = FLA_DOUBLE_PTR( T );
56
58 m_T,
60 buff_T, rs_T, cs_T );
61
62 break;
63 }
64
65 case FLA_COMPLEX:
66 {
69
71 m_T,
73 buff_T, rs_T, cs_T );
74
75 break;
76 }
77
79 {
82
84 m_T,
86 buff_T, rs_T, cs_T );
87
88 break;
89 }
90 }
91
92 return FLA_SUCCESS;
93}
FLA_Error FLA_Hess_UT_step_ofs_var3(int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T)
Definition FLA_Hess_UT_fus_var3.c:97
FLA_Error FLA_Hess_UT_step_ofz_var3(int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T)
Definition FLA_Hess_UT_fus_var3.c:1075
FLA_Error FLA_Hess_UT_step_ofd_var3(int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T)
Definition FLA_Hess_UT_fus_var3.c:423
FLA_Error FLA_Hess_UT_step_ofc_var3(int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T)
Definition FLA_Hess_UT_fus_var3.c:749
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition FLA_Query.c:167
dim_t FLA_Obj_length(FLA_Obj obj)
Definition FLA_Query.c:116
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition FLA_Query.c:174
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition FLA_Query.c:13
int FLA_Datatype
Definition FLA_type_defs.h:49
Definition blis_type_defs.h:138

References FLA_Hess_UT_step_ofc_var3(), FLA_Hess_UT_step_ofd_var3(), FLA_Hess_UT_step_ofs_var3(), FLA_Hess_UT_step_ofz_var3(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and i.

Referenced by FLA_Hess_UT_blf_var3(), and FLA_Hess_UT_ofu_var3().

◆ FLA_Hess_UT_step_ofz_var3()

FLA_Error FLA_Hess_UT_step_ofz_var3 ( int  m_A,
int  m_T,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_T,
int  rs_T,
int  cs_T 
)
1079{
1084
1093 int i;
1094
1095 // b_alg = FLA_Obj_length( T );
1096 int b_alg = m_T;
1097
1098 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
1099 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &y );
1100 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
1101 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &v );
1102 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
1103 dcomplex* buff_u = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1104 dcomplex* buff_y = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1105 dcomplex* buff_z = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1106 dcomplex* buff_v = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1107 dcomplex* buff_w = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1108 int inc_u = 1;
1109 int inc_y = 1;
1110 int inc_z = 1;
1111 int inc_v = 1;
1112 int inc_w = 1;
1113
1114 for ( i = 0; i < b_alg; ++i )
1115 {
1116 dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
1117 dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
1118 dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
1119 dcomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
1120 dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
1121 dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
1122
1123 dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
1124 dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
1125
1126 dcomplex* upsilon1 = buff_u + (i )*inc_u;
1127 dcomplex* u2 = buff_u + (i+1)*inc_u;
1128
1129 dcomplex* y0 = buff_y + (0 )*inc_y;
1130 dcomplex* psi1 = buff_y + (i )*inc_y;
1131 dcomplex* y2 = buff_y + (i+1)*inc_y;
1132
1133 dcomplex* zeta1 = buff_z + (i )*inc_z;
1134 dcomplex* z2 = buff_z + (i+1)*inc_z;
1135
1136 dcomplex* v2 = buff_v + (i+1)*inc_v;
1137
1138 dcomplex* w2 = buff_w + (i+1)*inc_w;
1139
1140 dcomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
1141 dcomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
1142
1143 int m_ahead = m_A - i - 1;
1144 int n_ahead = m_A - i - 1;
1145 int m_behind = i;
1146 int n_behind = i;
1147
1148 /*------------------------------------------------------------*/
1149
1150 if ( m_behind > 0 )
1151 {
1152 // FLA_Copy( upsilon1, minus_upsilon1 );
1153 // FLA_Scal( FLA_MINUS_ONE, minus_upsilon1 );
1154 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon1, minus_conj_upsilon1 );
1157
1158 // FLA_Copy( psi1, minus_psi1 );
1159 // FLA_Scal( FLA_MINUS_ONE, minus_psi1 );
1160 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, minus_psi1, minus_conj_psi1 );
1163
1164 // FLA_Copy( zeta1, minus_zeta1 );
1165 // FLA_Scal( FLA_MINUS_ONE, minus_zeta1 );
1167
1168 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon1, psi1, alpha11 );
1169 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_zeta1, upsilon1, alpha11 );
1171 1,
1173 psi1, 1,
1174 alpha11, 1 );
1176 1,
1177 &minus_zeta1,
1178 upsilon1, 1,
1179 alpha11, 1 );
1180
1181 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_upsilon1, y2, a12t );
1182 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_zeta1, u2, a12t );
1184 m_ahead,
1186 y2, inc_y,
1187 a12t, cs_A );
1189 m_ahead,
1190 &minus_zeta1,
1191 u2, inc_u,
1192 a12t, cs_A );
1193
1194 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi1, u2, a21 );
1195 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_upsilon1, z2, a21 );
1197 m_ahead,
1199 u2, inc_u,
1200 a21, rs_A );
1202 m_ahead,
1204 z2, inc_z,
1205 a21, rs_A );
1206 }
1207
1208 if ( m_ahead > 0 )
1209 {
1210 // FLA_Househ2_UT( FLA_LEFT,
1211 // a21_t,
1212 // a21_b, tau11 );
1214 a21_t,
1215 a21_b, rs_A,
1216 tau11 );
1217
1218 // FLA_Set( FLA_ONE, inv_tau11 );
1219 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
1220 // FLA_Copy( inv_tau11, minus_inv_tau11 );
1221 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
1224
1225 // FLA_Copy( a21_t, first_elem );
1226 // FLA_Set( FLA_ONE, a21_t );
1227 first_elem = *a21_t;
1228 *a21_t = *buff_1;
1229 }
1230
1231 if ( m_behind > 0 && m_ahead > 0 )
1232 {
1233 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u2, y2, A22 );
1234 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, u2, A22 );
1235 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, v2 );
1236 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, w2 );
1238 n_ahead,
1239 buff_m1,
1240 u2, inc_u,
1241 y2, inc_y,
1242 z2, inc_z,
1243 A22, rs_A, cs_A,
1244 a21, rs_A,
1245 v2, inc_v,
1246 w2, inc_w );
1247 }
1248 else if ( m_ahead > 0 )
1249 {
1250 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, v2 );
1251 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, w2 );
1253 n_ahead,
1254 A22, rs_A, cs_A,
1255 a21, rs_A,
1256 v2, inc_v,
1257 w2, inc_w );
1258 }
1259
1260 if ( m_ahead > 0 )
1261 {
1262 // FLA_Copy( a21, u2 );
1263 // FLA_Copy( v2, y2 );
1264 // FLA_Copy( w2, z2 );
1266 m_ahead,
1267 a21, rs_A,
1268 u2, inc_u );
1270 m_ahead,
1271 v2, inc_v,
1272 y2, inc_y );
1274 m_ahead,
1275 w2, inc_w,
1276 z2, inc_z );
1277
1278 // FLA_Dotc( FLA_CONJUGATE, a21, z2, beta );
1279 // FLA_Inv_scal( FLA_TWO, beta );
1280 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
1282 m_ahead,
1283 a21, rs_A,
1284 z2, inc_z,
1285 &beta );
1288
1289 // FLA_Scal( minus_inv_tau11, conj_beta );
1290 // FLA_Axpy( conj_beta, a21, y2 );
1291 // FLA_Scal( inv_tau11, y2 );
1294 m_ahead,
1295 &conj_beta,
1296 a21, rs_A,
1297 y2, inc_y );
1299 m_ahead,
1300 &inv_tau11,
1301 y2, inc_y );
1302
1303 // FLA_Scal( minus_inv_tau11, beta );
1304 // FLA_Axpy( beta, a21, z2 );
1305 // FLA_Scal( inv_tau11, z2 );
1308 m_ahead,
1309 &beta,
1310 a21, rs_A,
1311 z2, inc_z );
1313 m_ahead,
1314 &inv_tau11,
1315 z2, inc_z );
1316
1317 // FLA_Dot( a12t, a21, dot_product );
1318 // FLA_Scal( minus_inv_tau11, dot_product );
1319 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
1321 m_ahead,
1322 a12t, cs_A,
1323 a21, rs_A,
1324 &dot_product );
1327 m_ahead,
1328 &dot_product,
1329 a21, rs_A,
1330 a12t, cs_A );
1331
1332 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, y0 );
1333 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, y0, a21, A02 );
1336 m_behind,
1337 n_ahead,
1338 buff_1,
1339 A02, rs_A, cs_A,
1340 a21, rs_A,
1341 buff_0,
1342 y0, inc_y );
1345 m_behind,
1346 n_ahead,
1348 y0, inc_y,
1349 a21, rs_A,
1350 A02, rs_A, cs_A );
1351
1352 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
1355 m_ahead,
1356 n_behind,
1357 buff_1,
1358 A20, rs_A, cs_A,
1359 a21, rs_A,
1360 buff_0,
1361 t01, rs_T );
1362
1363 // FLA_Copy( first_elem, a21_t );
1364 *a21_t = first_elem;
1365 }
1366
1367 if ( m_behind + 1 == b_alg && m_ahead > 0 )
1368 {
1369 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u2, y2, A22 );
1370 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, u2, A22 );
1372 n_ahead,
1373 buff_m1,
1374 u2, inc_u,
1375 y2, inc_y,
1376 z2, inc_z,
1377 u2, inc_u,
1378 A22, rs_A, cs_A );
1379 }
1380
1381 /*------------------------------------------------------------*/
1382
1383 }
1384
1385 // FLA_Obj_free( &u );
1386 // FLA_Obj_free( &y );
1387 // FLA_Obj_free( &z );
1388 // FLA_Obj_free( &v );
1389 // FLA_Obj_free( &w );
1390 FLA_free( buff_u );
1391 FLA_free( buff_y );
1392 FLA_free( buff_z );
1393 FLA_free( buff_v );
1394 FLA_free( buff_w );
1395
1396 return FLA_SUCCESS;
1397}
FLA_Error FLA_Fused_Gerc2_opz_var1(int m_A, int n_A, dcomplex *buff_alpha, dcomplex *buff_u, int inc_u, dcomplex *buff_y, int inc_y, dcomplex *buff_z, int inc_z, dcomplex *buff_v, int inc_v, dcomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Fused_Gerc2_opt_var1.c:306
FLA_Error FLA_Fused_Ahx_Ax_opz_var1(int m_A, int n_A, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_x, int inc_x, dcomplex *buff_v, int inc_v, dcomplex *buff_w, int inc_w)
Definition FLA_Fused_Ahx_Ax_opt_var1.c:307
FLA_Error FLA_Fused_Gerc2_Ahx_Ax_opz_var1(int m_A, int n_A, dcomplex *buff_alpha, dcomplex *buff_u, int inc_u, dcomplex *buff_y, int inc_y, dcomplex *buff_z, int inc_z, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_x, int inc_x, dcomplex *buff_v, int inc_v, dcomplex *buff_w, int inc_w)
Definition FLA_Fused_Gerc2_Ahx_Ax_opt_var1.c:421
FLA_Error FLA_Househ2_UT_l_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *tau)
Definition FLA_Househ2_UT.c:521
void bl1_zaxpyv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy)
Definition bl1_axpyv.c:60
void bl1_zcopyv(conj1_t conj, int m, dcomplex *x, int incx, dcomplex *y, int incy)
Definition bl1_copyv.c:63
void bl1_zdot(conj1_t conj, int n, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *rho)
Definition bl1_dot.c:65
bl1_zscals(beta, rho_yz)
void bl1_zgemv(trans1_t transa, conj1_t conjx, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition bl1_gemv.c:255
void bl1_zger(conj1_t conjx, conj1_t conjy, int m, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *a, int a_rs, int a_cs)
Definition bl1_ger.c:194
void bl1_zscalv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx)
Definition bl1_scalv.c:72

References bl1_zaxpyv(), bl1_zcopyv(), bl1_zdot(), bl1_zgemv(), bl1_zger(), bl1_zscals(), bl1_zscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Fused_Ahx_Ax_opz_var1(), FLA_Fused_Gerc2_Ahx_Ax_opz_var1(), FLA_Fused_Gerc2_opz_var1(), FLA_Househ2_UT_l_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, i, psi1, upsilon1, and zeta1.

Referenced by FLA_Hess_UT_step_ofu_var3().