libflame revision_anchor
Functions
FLA_Hess_UT_opt_var4.c File Reference

(r)

Functions

FLA_Error FLA_Hess_UT_opt_var4 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_opt_var4 (FLA_Obj A, FLA_Obj Y, FLA_Obj Z, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_ops_var4 (int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_Y, int rs_Y, int cs_Y, float *buff_Z, int rs_Z, int cs_Z, float *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_opd_var4 (int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_Y, int rs_Y, int cs_Y, double *buff_Z, int rs_Z, int cs_Z, double *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_opc_var4 (int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_Y, int rs_Y, int cs_Y, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_opz_var4 (int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_Y, int rs_Y, int cs_Y, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_T, int rs_T, int cs_T)
 

Function Documentation

◆ FLA_Hess_UT_opt_var4()

FLA_Error FLA_Hess_UT_opt_var4 ( FLA_Obj  A,
FLA_Obj  T 
)
14{
16 FLA_Obj Y, Z;
17
20
22
23 FLA_Obj_free( &Y );
24 FLA_Obj_free( &Z );
25
26 return r_val;
27}
FLA_Error FLA_Hess_UT_step_opt_var4(FLA_Obj A, FLA_Obj Y, FLA_Obj Z, FLA_Obj T)
Definition FLA_Hess_UT_opt_var4.c:29
FLA_Error FLA_Obj_create_conf_to(FLA_Trans trans, FLA_Obj old, FLA_Obj *obj)
Definition FLA_Obj.c:286
FLA_Error FLA_Obj_free(FLA_Obj *obj)
Definition FLA_Obj.c:588
int FLA_Error
Definition FLA_type_defs.h:47
int i
Definition bl1_axmyv2.c:145
Definition FLA_type_defs.h:159

References FLA_Hess_UT_step_opt_var4(), FLA_Obj_create_conf_to(), FLA_Obj_free(), and i.

Referenced by FLA_Hess_UT_internal().

◆ FLA_Hess_UT_step_opc_var4()

FLA_Error FLA_Hess_UT_step_opc_var4 ( int  m_A,
int  m_T,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_Y,
int  rs_Y,
int  cs_Y,
scomplex buff_Z,
int  rs_Z,
int  cs_Z,
scomplex buff_T,
int  rs_T,
int  cs_T 
)
858{
863
869 int i;
870
871 // b_alg = FLA_Obj_length( T );
872 int b_alg = m_T;
873
874 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &d );
875 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &e );
876 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
877 scomplex* buff_d = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
878 scomplex* buff_e = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
879 scomplex* buff_f = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
880 int inc_d = 1;
881 int inc_e = 1;
882 int inc_f = 1;
883
884 // FLA_Set( FLA_ZERO, Y );
885 // FLA_Set( FLA_ZERO, Z );
886 bl1_csetm( m_A,
887 b_alg,
888 buff_0,
889 buff_Y, rs_Y, cs_Y );
890 bl1_csetm( m_A,
891 b_alg,
892 buff_0,
893 buff_Z, rs_Z, cs_Z );
894
895 for ( i = 0; i < b_alg; ++i )
896 {
897 scomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
898 scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
899 scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
900 scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
901 scomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
902 scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
903 scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
904
905 scomplex* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
906 scomplex* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
907 scomplex* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
908
909 scomplex* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
910 scomplex* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
911 scomplex* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
912
913 scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
914 scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
915
916 scomplex* d0 = buff_d + (0 )*inc_d;
917
918 scomplex* e0 = buff_e + (0 )*inc_e;
919
920 scomplex* f0 = buff_f + (0 )*inc_f;
921
922 scomplex* a10t_r = a10t + (i-1)*cs_A + (0 )*rs_A;
923
924 scomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
925 scomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
926
927 scomplex* ABL = a10t;
928 scomplex* ZBL = z10t;
929
931
932 int m_ahead = m_A - i - 1;
933 int n_ahead = m_A - i - 1;
934 int m_behind = i;
935 int n_behind = i;
936
937 /*------------------------------------------------------------*/
938
939 if ( m_behind > 0 )
940 {
941 // FLA_Copy( a10t_r, last_elem );
942 // FLA_Set( FLA_ONE, a10t_r );
943 last_elem = *a10t_r;
944 *a10t_r = *buff_1;
945 }
946
947 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
948 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a10t, FLA_ONE, a2 );
951 m_ahead + 1,
952 n_behind,
953 buff_m1,
954 ABL, rs_A, cs_A,
955 y10t, cs_Y,
956 buff_1,
957 a2, rs_A );
960 m_ahead + 1,
961 n_behind,
962 buff_m1,
963 ZBL, rs_Z, cs_Z,
964 a10t, cs_A,
965 buff_1,
966 a2, rs_A );
967
968 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
969 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, A20, z10t, FLA_ONE, a12t );
972 m_ahead,
973 n_behind,
974 buff_m1,
975 Y20, rs_Y, cs_Y,
976 a10t, cs_A,
977 buff_1,
978 a12t, cs_A );
981 m_ahead,
982 n_behind,
983 buff_m1,
984 A20, rs_A, cs_A,
985 z10t, cs_Z,
986 buff_1,
987 a12t, cs_A );
988
989 if ( m_behind > 0 )
990 {
991 // FLA_Copy( last_elem, a10t_r );
992 *a10t_r = last_elem;
993 }
994
995 if ( m_ahead > 0 )
996 {
997 // FLA_Househ2_UT( FLA_LEFT,
998 // a21_t,
999 // a21_b, tau11 );
1001 a21_t,
1002 a21_b, rs_A,
1003 tau11 );
1004
1005 // FLA_Set( FLA_ONE, inv_tau11 );
1006 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
1007 // FLA_Copy( inv_tau11, minus_inv_tau11 );
1008 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
1011
1012 // FLA_Copy( a21_t, first_elem );
1013 // FLA_Set( FLA_ONE, a21_t );
1014 first_elem = *a21_t;
1015 *a21_t = *buff_1;
1016
1017 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, y21 );
1020 m_ahead,
1021 n_ahead,
1022 buff_1,
1023 A22, rs_A, cs_A,
1024 a21, rs_A,
1025 buff_0,
1026 y21, rs_Y );
1027
1028 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, z21 );
1031 m_ahead,
1032 n_ahead,
1033 buff_1,
1034 A22, rs_A, cs_A,
1035 a21, rs_A,
1036 buff_0,
1037 z21, rs_Z );
1038
1039 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, d0 );
1040 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Y20, a21, FLA_ZERO, e0 );
1041 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, a21, FLA_ZERO, f0 );
1044 m_ahead,
1045 n_behind,
1046 buff_1,
1047 A20, rs_A, cs_A,
1048 a21, rs_A,
1049 buff_0,
1050 d0, inc_d );
1053 m_ahead,
1054 n_behind,
1055 buff_1,
1056 Y20, rs_Y, cs_Y,
1057 a21, rs_A,
1058 buff_0,
1059 e0, inc_e );
1062 m_ahead,
1063 n_behind,
1064 buff_1,
1065 Z20, rs_Z, cs_Z,
1066 a21, rs_A,
1067 buff_0,
1068 f0, inc_f );
1069
1070 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
1071 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, y21 );
1074 m_ahead,
1075 n_behind,
1076 buff_m1,
1077 Y20, rs_Y, cs_Y,
1078 d0, inc_d,
1079 buff_1,
1080 y21, rs_Y );
1083 m_ahead,
1084 n_behind,
1085 buff_m1,
1086 A20, rs_A, cs_A,
1087 f0, inc_f,
1088 buff_1,
1089 y21, rs_Y );
1090
1091 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, e0, FLA_ONE, z21 );
1092 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, d0, FLA_ONE, z21 );
1095 m_ahead,
1096 n_behind,
1097 buff_m1,
1098 A20, rs_A, cs_A,
1099 e0, inc_e,
1100 buff_1,
1101 z21, rs_Z );
1104 m_ahead,
1105 n_behind,
1106 buff_m1,
1107 Z20, rs_Z, cs_Z,
1108 d0, inc_d,
1109 buff_1,
1110 z21, rs_Z );
1111
1112 // FLA_Copy( d0, t01 );
1114 n_behind,
1115 d0, inc_d,
1116 t01, rs_T );
1117
1118 // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
1119 // FLA_Inv_scal( FLA_TWO, beta );
1120 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
1122 m_ahead,
1123 a21, rs_A,
1124 z21, rs_Z,
1125 &beta );
1128
1129 // FLA_Scal( minus_inv_tau11, conj_beta );
1130 // FLA_Axpy( conj_beta, a21, y21 );
1131 // FLA_Scal( inv_tau11, y21 );
1134 m_ahead,
1135 &conj_beta,
1136 a21, rs_A,
1137 y21, rs_Y );
1139 m_ahead,
1140 &inv_tau11,
1141 y21, rs_Y );
1142
1143 // FLA_Scal( minus_inv_tau11, beta );
1144 // FLA_Axpy( beta, a21, z21 );
1145 // FLA_Scal( inv_tau11, z21 );
1148 m_ahead,
1149 &beta,
1150 a21, rs_A,
1151 z21, rs_Z );
1153 m_ahead,
1154 &inv_tau11,
1155 z21, rs_Z );
1156
1157 // FLA_Dot( a12t, a21, dot_product );
1158 // FLA_Scal( minus_inv_tau11, dot_product );
1159 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
1161 m_ahead,
1162 a12t, cs_A,
1163 a21, rs_A,
1164 &dot_product );
1167 m_ahead,
1168 &dot_product,
1169 a21, rs_A,
1170 a12t, cs_A );
1171
1172 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, e0 );
1173 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, e0, a21, A02 );
1176 m_behind,
1177 n_ahead,
1178 buff_1,
1179 A02, rs_A, cs_A,
1180 a21, rs_A,
1181 buff_0,
1182 e0, inc_e );
1185 m_behind,
1186 n_ahead,
1188 e0, inc_e,
1189 a21, rs_A,
1190 A02, rs_A, cs_A );
1191
1192 // FLA_Copy( first_elem, a21_t );
1193 *a21_t = first_elem;
1194 }
1195
1196 /*------------------------------------------------------------*/
1197
1198 }
1199
1200 // FLA_Obj_free( &d );
1201 // FLA_Obj_free( &e );
1202 // FLA_Obj_free( &f );
1203 FLA_free( buff_d );
1204 FLA_free( buff_e );
1205 FLA_free( buff_f );
1206
1207 return FLA_SUCCESS;
1208}
FLA_Obj FLA_TWO
Definition FLA_Init.c:17
FLA_Obj FLA_MINUS_ONE
Definition FLA_Init.c:22
FLA_Obj FLA_ZERO
Definition FLA_Init.c:20
FLA_Obj FLA_ONE
Definition FLA_Init.c:18
void FLA_free(void *ptr)
Definition FLA_Memory.c:247
void * FLA_malloc(size_t size)
Definition FLA_Memory.c:111
FLA_Error FLA_Househ2_UT_l_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *tau)
Definition FLA_Househ2_UT.c:390
void bl1_caxpyv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
Definition bl1_axpyv.c:29
void bl1_ccopyv(conj1_t conj, int m, scomplex *x, int incx, scomplex *y, int incy)
Definition bl1_copyv.c:49
void bl1_cdot(conj1_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho)
Definition bl1_dot.c:39
void bl1_cgemv(trans1_t transa, conj1_t conjx, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition bl1_gemv.c:125
void bl1_cger(conj1_t conjx, conj1_t conjy, int m, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy, scomplex *a, int a_rs, int a_cs)
Definition bl1_ger.c:111
void bl1_cscalv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx)
Definition bl1_scalv.c:46
void bl1_csetm(int m, int n, scomplex *sigma, scomplex *a, int a_rs, int a_cs)
Definition bl1_setm.c:61
@ BLIS1_NO_TRANSPOSE
Definition blis_type_defs.h:54
@ BLIS1_CONJ_TRANSPOSE
Definition blis_type_defs.h:57
@ BLIS1_CONJ_NO_TRANSPOSE
Definition blis_type_defs.h:56
@ BLIS1_CONJUGATE
Definition blis_type_defs.h:82
@ BLIS1_NO_CONJUGATE
Definition blis_type_defs.h:81
Definition blis_type_defs.h:133

References bl1_caxpyv(), bl1_ccopyv(), bl1_cdot(), bl1_cgemv(), bl1_cger(), bl1_cscalv(), bl1_csetm(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_opt_var4().

◆ FLA_Hess_UT_step_opd_var4()

FLA_Error FLA_Hess_UT_step_opd_var4 ( int  m_A,
int  m_T,
double buff_A,
int  rs_A,
int  cs_A,
double buff_Y,
int  rs_Y,
int  cs_Y,
double buff_Z,
int  rs_Z,
int  cs_Z,
double buff_T,
int  rs_T,
int  cs_T 
)
498{
499 double* buff_2 = FLA_DOUBLE_PTR( FLA_TWO );
500 double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
501 double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
503
504 double first_elem, last_elem;
505 double dot_product;
506 double beta, conj_beta;
507 double inv_tau11;
508 double minus_inv_tau11;
509 int i;
510
511 // b_alg = FLA_Obj_length( T );
512 int b_alg = m_T;
513
514 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &d );
515 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &e );
516 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
517 double* buff_d = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
518 double* buff_e = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
519 double* buff_f = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
520 int inc_d = 1;
521 int inc_e = 1;
522 int inc_f = 1;
523
524 // FLA_Set( FLA_ZERO, Y );
525 // FLA_Set( FLA_ZERO, Z );
526 bl1_dsetm( m_A,
527 b_alg,
528 buff_0,
529 buff_Y, rs_Y, cs_Y );
530 bl1_dsetm( m_A,
531 b_alg,
532 buff_0,
533 buff_Z, rs_Z, cs_Z );
534
535 for ( i = 0; i < b_alg; ++i )
536 {
537 double* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
538 double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
539 double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
540 double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
541 double* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
542 double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
543 double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
544
545 double* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
546 double* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
547 double* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
548
549 double* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
550 double* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
551 double* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
552
553 double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
554 double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
555
556 double* d0 = buff_d + (0 )*inc_d;
557
558 double* e0 = buff_e + (0 )*inc_e;
559
560 double* f0 = buff_f + (0 )*inc_f;
561
562 double* a10t_r = a10t + (i-1)*cs_A + (0 )*rs_A;
563
564 double* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
565 double* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
566
567 double* ABL = a10t;
568 double* ZBL = z10t;
569
570 double* a2 = alpha11;
571
572 int m_ahead = m_A - i - 1;
573 int n_ahead = m_A - i - 1;
574 int m_behind = i;
575 int n_behind = i;
576
577 /*------------------------------------------------------------*/
578
579 if ( m_behind > 0 )
580 {
581 // FLA_Copy( a10t_r, last_elem );
582 // FLA_Set( FLA_ONE, a10t_r );
583 last_elem = *a10t_r;
584 *a10t_r = *buff_1;
585 }
586
587 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
588 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a10t, FLA_ONE, a2 );
591 m_ahead + 1,
592 n_behind,
593 buff_m1,
594 ABL, rs_A, cs_A,
595 y10t, cs_Y,
596 buff_1,
597 a2, rs_A );
600 m_ahead + 1,
601 n_behind,
602 buff_m1,
603 ZBL, rs_Z, cs_Z,
604 a10t, cs_A,
605 buff_1,
606 a2, rs_A );
607
608 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
609 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, A20, z10t, FLA_ONE, a12t );
612 m_ahead,
613 n_behind,
614 buff_m1,
615 Y20, rs_Y, cs_Y,
616 a10t, cs_A,
617 buff_1,
618 a12t, cs_A );
621 m_ahead,
622 n_behind,
623 buff_m1,
624 A20, rs_A, cs_A,
625 z10t, cs_Z,
626 buff_1,
627 a12t, cs_A );
628
629 if ( m_behind > 0 )
630 {
631 // FLA_Copy( last_elem, a10t_r );
632 *a10t_r = last_elem;
633 }
634
635 if ( m_ahead > 0 )
636 {
637 // FLA_Househ2_UT( FLA_LEFT,
638 // a21_t,
639 // a21_b, tau11 );
641 a21_t,
642 a21_b, rs_A,
643 tau11 );
644
645 // FLA_Set( FLA_ONE, inv_tau11 );
646 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
647 // FLA_Copy( inv_tau11, minus_inv_tau11 );
648 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
651
652 // FLA_Copy( a21_t, first_elem );
653 // FLA_Set( FLA_ONE, a21_t );
654 first_elem = *a21_t;
655 *a21_t = *buff_1;
656
657 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, y21 );
660 m_ahead,
661 n_ahead,
662 buff_1,
663 A22, rs_A, cs_A,
664 a21, rs_A,
665 buff_0,
666 y21, rs_Y );
667
668 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, z21 );
671 m_ahead,
672 n_ahead,
673 buff_1,
674 A22, rs_A, cs_A,
675 a21, rs_A,
676 buff_0,
677 z21, rs_Z );
678
679 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, d0 );
680 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Y20, a21, FLA_ZERO, e0 );
681 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, a21, FLA_ZERO, f0 );
684 m_ahead,
685 n_behind,
686 buff_1,
687 A20, rs_A, cs_A,
688 a21, rs_A,
689 buff_0,
690 d0, inc_d );
693 m_ahead,
694 n_behind,
695 buff_1,
696 Y20, rs_Y, cs_Y,
697 a21, rs_A,
698 buff_0,
699 e0, inc_e );
702 m_ahead,
703 n_behind,
704 buff_1,
705 Z20, rs_Z, cs_Z,
706 a21, rs_A,
707 buff_0,
708 f0, inc_f );
709
710 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
711 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, y21 );
714 m_ahead,
715 n_behind,
716 buff_m1,
717 Y20, rs_Y, cs_Y,
718 d0, inc_d,
719 buff_1,
720 y21, rs_Y );
723 m_ahead,
724 n_behind,
725 buff_m1,
726 A20, rs_A, cs_A,
727 f0, inc_f,
728 buff_1,
729 y21, rs_Y );
730
731 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, e0, FLA_ONE, z21 );
732 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, d0, FLA_ONE, z21 );
735 m_ahead,
736 n_behind,
737 buff_m1,
738 A20, rs_A, cs_A,
739 e0, inc_e,
740 buff_1,
741 z21, rs_Z );
744 m_ahead,
745 n_behind,
746 buff_m1,
747 Z20, rs_Z, cs_Z,
748 d0, inc_d,
749 buff_1,
750 z21, rs_Z );
751
752 // FLA_Copy( d0, t01 );
754 n_behind,
755 d0, inc_d,
756 t01, rs_T );
757
758 // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
759 // FLA_Inv_scal( FLA_TWO, beta );
760 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
762 m_ahead,
763 a21, rs_A,
764 z21, rs_Z,
765 &beta );
768
769 // FLA_Scal( minus_inv_tau11, conj_beta );
770 // FLA_Axpy( conj_beta, a21, y21 );
771 // FLA_Scal( inv_tau11, y21 );
774 m_ahead,
775 &conj_beta,
776 a21, rs_A,
777 y21, rs_Y );
779 m_ahead,
780 &inv_tau11,
781 y21, rs_Y );
782
783 // FLA_Scal( minus_inv_tau11, beta );
784 // FLA_Axpy( beta, a21, z21 );
785 // FLA_Scal( inv_tau11, z21 );
788 m_ahead,
789 &beta,
790 a21, rs_A,
791 z21, rs_Z );
793 m_ahead,
794 &inv_tau11,
795 z21, rs_Z );
796
797 // FLA_Dot( a12t, a21, dot_product );
798 // FLA_Scal( minus_inv_tau11, dot_product );
799 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
801 m_ahead,
802 a12t, cs_A,
803 a21, rs_A,
804 &dot_product );
807 m_ahead,
809 a21, rs_A,
810 a12t, cs_A );
811
812 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, e0 );
813 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, e0, a21, A02 );
816 m_behind,
817 n_ahead,
818 buff_1,
819 A02, rs_A, cs_A,
820 a21, rs_A,
821 buff_0,
822 e0, inc_e );
825 m_behind,
826 n_ahead,
828 e0, inc_e,
829 a21, rs_A,
830 A02, rs_A, cs_A );
831
832 // FLA_Copy( first_elem, a21_t );
833 *a21_t = first_elem;
834 }
835
836 /*------------------------------------------------------------*/
837
838 }
839
840 // FLA_Obj_free( &d );
841 // FLA_Obj_free( &e );
842 // FLA_Obj_free( &f );
843 FLA_free( buff_d );
844 FLA_free( buff_e );
845 FLA_free( buff_f );
846
847 return FLA_SUCCESS;
848}
FLA_Error FLA_Househ2_UT_l_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *tau)
Definition FLA_Househ2_UT.c:274
void bl1_daxpyv(conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy)
Definition bl1_axpyv.c:21
void bl1_dcopyv(conj1_t conj, int m, double *x, int incx, double *y, int incy)
Definition bl1_copyv.c:42
void bl1_ddot(conj1_t conj, int n, double *x, int incx, double *y, int incy, double *rho)
Definition bl1_dot.c:26
void bl1_dgemv(trans1_t transa, conj1_t conjx, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
Definition bl1_gemv.c:69
void bl1_dger(conj1_t conjx, conj1_t conjy, int m, int n, double *alpha, double *x, int incx, double *y, int incy, double *a, int a_rs, int a_cs)
Definition bl1_ger.c:62
void bl1_dscalv(conj1_t conj, int n, double *alpha, double *x, int incx)
Definition bl1_scalv.c:24
void bl1_dsetm(int m, int n, double *sigma, double *a, int a_rs, int a_cs)
Definition bl1_setm.c:45

References bl1_daxpyv(), bl1_dcopyv(), bl1_ddot(), bl1_dgemv(), bl1_dger(), bl1_dscalv(), bl1_dsetm(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_opt_var4().

◆ FLA_Hess_UT_step_ops_var4()

FLA_Error FLA_Hess_UT_step_ops_var4 ( int  m_A,
int  m_T,
float buff_A,
int  rs_A,
int  cs_A,
float buff_Y,
int  rs_Y,
int  cs_Y,
float buff_Z,
int  rs_Z,
int  cs_Z,
float buff_T,
int  rs_T,
int  cs_T 
)
138{
139 float* buff_2 = FLA_FLOAT_PTR( FLA_TWO );
140 float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
141 float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
143
144 float first_elem, last_elem;
145 float dot_product;
146 float beta, conj_beta;
147 float inv_tau11;
148 float minus_inv_tau11;
149 int i;
150
151 // b_alg = FLA_Obj_length( T );
152 int b_alg = m_T;
153
154 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &d );
155 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &e );
156 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
157 float* buff_d = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
158 float* buff_e = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
159 float* buff_f = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
160 int inc_d = 1;
161 int inc_e = 1;
162 int inc_f = 1;
163
164 // FLA_Set( FLA_ZERO, Y );
165 // FLA_Set( FLA_ZERO, Z );
166 bl1_ssetm( m_A,
167 b_alg,
168 buff_0,
169 buff_Y, rs_Y, cs_Y );
170 bl1_ssetm( m_A,
171 b_alg,
172 buff_0,
173 buff_Z, rs_Z, cs_Z );
174
175 for ( i = 0; i < b_alg; ++i )
176 {
177 float* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
178 float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
179 float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
180 float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
181 float* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
182 float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
183 float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
184
185 float* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
186 float* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
187 float* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
188
189 float* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
190 float* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
191 float* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
192
193 float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
194 float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
195
196 float* d0 = buff_d + (0 )*inc_d;
197
198 float* e0 = buff_e + (0 )*inc_e;
199
200 float* f0 = buff_f + (0 )*inc_f;
201
202 float* a10t_r = a10t + (i-1)*cs_A + (0 )*rs_A;
203
204 float* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
205 float* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
206
207 float* ABL = a10t;
208 float* ZBL = z10t;
209
210 float* a2 = alpha11;
211
212 int m_ahead = m_A - i - 1;
213 int n_ahead = m_A - i - 1;
214 int m_behind = i;
215 int n_behind = i;
216
217 /*------------------------------------------------------------*/
218
219 if ( m_behind > 0 )
220 {
221 // FLA_Copy( a10t_r, last_elem );
222 // FLA_Set( FLA_ONE, a10t_r );
223 last_elem = *a10t_r;
224 *a10t_r = *buff_1;
225 }
226
227 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
228 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a10t, FLA_ONE, a2 );
231 m_ahead + 1,
232 n_behind,
233 buff_m1,
234 ABL, rs_A, cs_A,
235 y10t, cs_Y,
236 buff_1,
237 a2, rs_A );
240 m_ahead + 1,
241 n_behind,
242 buff_m1,
243 ZBL, rs_Z, cs_Z,
244 a10t, cs_A,
245 buff_1,
246 a2, rs_A );
247
248 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
249 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, A20, z10t, FLA_ONE, a12t );
252 m_ahead,
253 n_behind,
254 buff_m1,
255 Y20, rs_Y, cs_Y,
256 a10t, cs_A,
257 buff_1,
258 a12t, cs_A );
261 m_ahead,
262 n_behind,
263 buff_m1,
264 A20, rs_A, cs_A,
265 z10t, cs_Z,
266 buff_1,
267 a12t, cs_A );
268
269 if ( m_behind > 0 )
270 {
271 // FLA_Copy( last_elem, a10t_r );
272 *a10t_r = last_elem;
273 }
274
275 if ( m_ahead > 0 )
276 {
277 // FLA_Househ2_UT( FLA_LEFT,
278 // a21_t,
279 // a21_b, tau11 );
281 a21_t,
282 a21_b, rs_A,
283 tau11 );
284
285 // FLA_Set( FLA_ONE, inv_tau11 );
286 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
287 // FLA_Copy( inv_tau11, minus_inv_tau11 );
288 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
291
292 // FLA_Copy( a21_t, first_elem );
293 // FLA_Set( FLA_ONE, a21_t );
294 first_elem = *a21_t;
295 *a21_t = *buff_1;
296
297 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, y21 );
300 m_ahead,
301 n_ahead,
302 buff_1,
303 A22, rs_A, cs_A,
304 a21, rs_A,
305 buff_0,
306 y21, rs_Y );
307
308 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, z21 );
311 m_ahead,
312 n_ahead,
313 buff_1,
314 A22, rs_A, cs_A,
315 a21, rs_A,
316 buff_0,
317 z21, rs_Z );
318
319 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, d0 );
320 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Y20, a21, FLA_ZERO, e0 );
321 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, a21, FLA_ZERO, f0 );
324 m_ahead,
325 n_behind,
326 buff_1,
327 A20, rs_A, cs_A,
328 a21, rs_A,
329 buff_0,
330 d0, inc_d );
333 m_ahead,
334 n_behind,
335 buff_1,
336 Y20, rs_Y, cs_Y,
337 a21, rs_A,
338 buff_0,
339 e0, inc_e );
342 m_ahead,
343 n_behind,
344 buff_1,
345 Z20, rs_Z, cs_Z,
346 a21, rs_A,
347 buff_0,
348 f0, inc_f );
349
350 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
351 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, y21 );
354 m_ahead,
355 n_behind,
356 buff_m1,
357 Y20, rs_Y, cs_Y,
358 d0, inc_d,
359 buff_1,
360 y21, rs_Y );
363 m_ahead,
364 n_behind,
365 buff_m1,
366 A20, rs_A, cs_A,
367 f0, inc_f,
368 buff_1,
369 y21, rs_Y );
370
371 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, e0, FLA_ONE, z21 );
372 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, d0, FLA_ONE, z21 );
375 m_ahead,
376 n_behind,
377 buff_m1,
378 A20, rs_A, cs_A,
379 e0, inc_e,
380 buff_1,
381 z21, rs_Z );
384 m_ahead,
385 n_behind,
386 buff_m1,
387 Z20, rs_Z, cs_Z,
388 d0, inc_d,
389 buff_1,
390 z21, rs_Z );
391
392 // FLA_Copy( d0, t01 );
394 n_behind,
395 d0, inc_d,
396 t01, rs_T );
397
398 // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
399 // FLA_Inv_scal( FLA_TWO, beta );
400 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
402 m_ahead,
403 a21, rs_A,
404 z21, rs_Z,
405 &beta );
408
409 // FLA_Scal( minus_inv_tau11, conj_beta );
410 // FLA_Axpy( conj_beta, a21, y21 );
411 // FLA_Scal( inv_tau11, y21 );
414 m_ahead,
415 &conj_beta,
416 a21, rs_A,
417 y21, rs_Y );
419 m_ahead,
420 &inv_tau11,
421 y21, rs_Y );
422
423 // FLA_Scal( minus_inv_tau11, beta );
424 // FLA_Axpy( beta, a21, z21 );
425 // FLA_Scal( inv_tau11, z21 );
428 m_ahead,
429 &beta,
430 a21, rs_A,
431 z21, rs_Z );
433 m_ahead,
434 &inv_tau11,
435 z21, rs_Z );
436
437 // FLA_Dot( a12t, a21, dot_product );
438 // FLA_Scal( minus_inv_tau11, dot_product );
439 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
441 m_ahead,
442 a12t, cs_A,
443 a21, rs_A,
444 &dot_product );
447 m_ahead,
449 a21, rs_A,
450 a12t, cs_A );
451
452 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, e0 );
453 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, e0, a21, A02 );
456 m_behind,
457 n_ahead,
458 buff_1,
459 A02, rs_A, cs_A,
460 a21, rs_A,
461 buff_0,
462 e0, inc_e );
465 m_behind,
466 n_ahead,
468 e0, inc_e,
469 a21, rs_A,
470 A02, rs_A, cs_A );
471
472 // FLA_Copy( first_elem, a21_t );
473 *a21_t = first_elem;
474 }
475
476 /*------------------------------------------------------------*/
477
478 }
479
480 // FLA_Obj_free( &d );
481 // FLA_Obj_free( &e );
482 // FLA_Obj_free( &f );
483 FLA_free( buff_d );
484 FLA_free( buff_e );
485 FLA_free( buff_f );
486
487 return FLA_SUCCESS;
488}
FLA_Error FLA_Househ2_UT_l_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *tau)
Definition FLA_Househ2_UT.c:160
void bl1_saxpyv(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy)
Definition bl1_axpyv.c:13
void bl1_scopyv(conj1_t conj, int m, float *x, int incx, float *y, int incy)
Definition bl1_copyv.c:35
void bl1_sdot(conj1_t conj, int n, float *x, int incx, float *y, int incy, float *rho)
Definition bl1_dot.c:13
void bl1_sgemv(trans1_t transa, conj1_t conjx, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
Definition bl1_gemv.c:13
void bl1_sger(conj1_t conjx, conj1_t conjy, int m, int n, float *alpha, float *x, int incx, float *y, int incy, float *a, int a_rs, int a_cs)
Definition bl1_ger.c:13
void bl1_sscalv(conj1_t conj, int n, float *alpha, float *x, int incx)
Definition bl1_scalv.c:13
void bl1_ssetm(int m, int n, float *sigma, float *a, int a_rs, int a_cs)
Definition bl1_setm.c:29

References bl1_saxpyv(), bl1_scopyv(), bl1_sdot(), bl1_sgemv(), bl1_sger(), bl1_sscalv(), bl1_ssetm(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_opt_var4().

◆ FLA_Hess_UT_step_opt_var4()

FLA_Error FLA_Hess_UT_step_opt_var4 ( FLA_Obj  A,
FLA_Obj  Y,
FLA_Obj  Z,
FLA_Obj  T 
)
30{
31 FLA_Datatype datatype;
32 int m_A, m_T;
33 int rs_A, cs_A;
34 int rs_Y, cs_Y;
35 int rs_Z, cs_Z;
36 int rs_T, cs_T;
37
38 datatype = FLA_Obj_datatype( A );
39
40 m_A = FLA_Obj_length( A );
41 m_T = FLA_Obj_length( T );
42
45
48
51
54
55
56 switch ( datatype )
57 {
58 case FLA_FLOAT:
59 {
60 float* buff_A = FLA_FLOAT_PTR( A );
61 float* buff_Y = FLA_FLOAT_PTR( Y );
62 float* buff_Z = FLA_FLOAT_PTR( Z );
63 float* buff_T = FLA_FLOAT_PTR( T );
64
66 m_T,
70 buff_T, rs_T, cs_T );
71
72 break;
73 }
74
75 case FLA_DOUBLE:
76 {
77 double* buff_A = FLA_DOUBLE_PTR( A );
78 double* buff_Y = FLA_DOUBLE_PTR( Y );
79 double* buff_Z = FLA_DOUBLE_PTR( Z );
80 double* buff_T = FLA_DOUBLE_PTR( T );
81
83 m_T,
87 buff_T, rs_T, cs_T );
88
89 break;
90 }
91
92 case FLA_COMPLEX:
93 {
98
100 m_T,
101 buff_A, rs_A, cs_A,
102 buff_Y, rs_Y, cs_Y,
103 buff_Z, rs_Z, cs_Z,
104 buff_T, rs_T, cs_T );
105
106 break;
107 }
108
110 {
115
117 m_T,
118 buff_A, rs_A, cs_A,
119 buff_Y, rs_Y, cs_Y,
120 buff_Z, rs_Z, cs_Z,
121 buff_T, rs_T, cs_T );
122
123 break;
124 }
125 }
126
127 return FLA_SUCCESS;
128}
FLA_Error FLA_Hess_UT_step_opc_var4(int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_Y, int rs_Y, int cs_Y, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_T, int rs_T, int cs_T)
Definition FLA_Hess_UT_opt_var4.c:852
FLA_Error FLA_Hess_UT_step_opz_var4(int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_Y, int rs_Y, int cs_Y, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_T, int rs_T, int cs_T)
Definition FLA_Hess_UT_opt_var4.c:1212
FLA_Error FLA_Hess_UT_step_ops_var4(int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_Y, int rs_Y, int cs_Y, float *buff_Z, int rs_Z, int cs_Z, float *buff_T, int rs_T, int cs_T)
Definition FLA_Hess_UT_opt_var4.c:132
FLA_Error FLA_Hess_UT_step_opd_var4(int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_Y, int rs_Y, int cs_Y, double *buff_Z, int rs_Z, int cs_Z, double *buff_T, int rs_T, int cs_T)
Definition FLA_Hess_UT_opt_var4.c:492
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition FLA_Query.c:167
dim_t FLA_Obj_length(FLA_Obj obj)
Definition FLA_Query.c:116
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition FLA_Query.c:174
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition FLA_Query.c:13
int FLA_Datatype
Definition FLA_type_defs.h:49
Definition blis_type_defs.h:138

References FLA_Hess_UT_step_opc_var4(), FLA_Hess_UT_step_opd_var4(), FLA_Hess_UT_step_ops_var4(), FLA_Hess_UT_step_opz_var4(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and i.

Referenced by FLA_Hess_UT_blk_var4(), and FLA_Hess_UT_opt_var4().

◆ FLA_Hess_UT_step_opz_var4()

FLA_Error FLA_Hess_UT_step_opz_var4 ( int  m_A,
int  m_T,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_Y,
int  rs_Y,
int  cs_Y,
dcomplex buff_Z,
int  rs_Z,
int  cs_Z,
dcomplex buff_T,
int  rs_T,
int  cs_T 
)
1218{
1223
1229 int i;
1230
1231 // b_alg = FLA_Obj_length( T );
1232 int b_alg = m_T;
1233
1234 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &d );
1235 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &e );
1236 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
1237 dcomplex* buff_d = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1238 dcomplex* buff_e = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1239 dcomplex* buff_f = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1240 int inc_d = 1;
1241 int inc_e = 1;
1242 int inc_f = 1;
1243
1244 // FLA_Set( FLA_ZERO, Y );
1245 // FLA_Set( FLA_ZERO, Z );
1246 bl1_zsetm( m_A,
1247 b_alg,
1248 buff_0,
1249 buff_Y, rs_Y, cs_Y );
1250 bl1_zsetm( m_A,
1251 b_alg,
1252 buff_0,
1253 buff_Z, rs_Z, cs_Z );
1254
1255 for ( i = 0; i < b_alg; ++i )
1256 {
1257 dcomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
1258 dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
1259 dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
1260 dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
1261 dcomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
1262 dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
1263 dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
1264
1265 dcomplex* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
1266 dcomplex* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
1267 dcomplex* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
1268
1269 dcomplex* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
1270 dcomplex* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
1271 dcomplex* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
1272
1273 dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
1274 dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
1275
1276 dcomplex* d0 = buff_d + (0 )*inc_d;
1277
1278 dcomplex* e0 = buff_e + (0 )*inc_e;
1279
1280 dcomplex* f0 = buff_f + (0 )*inc_f;
1281
1282 dcomplex* a10t_r = a10t + (i-1)*cs_A + (0 )*rs_A;
1283
1284 dcomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
1285 dcomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
1286
1287 dcomplex* ABL = a10t;
1288 dcomplex* ZBL = z10t;
1289
1290 dcomplex* a2 = alpha11;
1291
1292 int m_ahead = m_A - i - 1;
1293 int n_ahead = m_A - i - 1;
1294 int m_behind = i;
1295 int n_behind = i;
1296
1297 /*------------------------------------------------------------*/
1298
1299 if ( m_behind > 0 )
1300 {
1301 // FLA_Copy( a10t_r, last_elem );
1302 // FLA_Set( FLA_ONE, a10t_r );
1303 last_elem = *a10t_r;
1304 *a10t_r = *buff_1;
1305 }
1306
1307 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
1308 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a10t, FLA_ONE, a2 );
1311 m_ahead + 1,
1312 n_behind,
1313 buff_m1,
1314 ABL, rs_A, cs_A,
1315 y10t, cs_Y,
1316 buff_1,
1317 a2, rs_A );
1320 m_ahead + 1,
1321 n_behind,
1322 buff_m1,
1323 ZBL, rs_Z, cs_Z,
1324 a10t, cs_A,
1325 buff_1,
1326 a2, rs_A );
1327
1328 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
1329 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, A20, z10t, FLA_ONE, a12t );
1332 m_ahead,
1333 n_behind,
1334 buff_m1,
1335 Y20, rs_Y, cs_Y,
1336 a10t, cs_A,
1337 buff_1,
1338 a12t, cs_A );
1341 m_ahead,
1342 n_behind,
1343 buff_m1,
1344 A20, rs_A, cs_A,
1345 z10t, cs_Z,
1346 buff_1,
1347 a12t, cs_A );
1348
1349 if ( m_behind > 0 )
1350 {
1351 // FLA_Copy( last_elem, a10t_r );
1352 *a10t_r = last_elem;
1353 }
1354
1355 if ( m_ahead > 0 )
1356 {
1357 // FLA_Househ2_UT( FLA_LEFT,
1358 // a21_t,
1359 // a21_b, tau11 );
1361 a21_t,
1362 a21_b, rs_A,
1363 tau11 );
1364
1365 // FLA_Set( FLA_ONE, inv_tau11 );
1366 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
1367 // FLA_Copy( inv_tau11, minus_inv_tau11 );
1368 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
1371
1372 // FLA_Copy( a21_t, first_elem );
1373 // FLA_Set( FLA_ONE, a21_t );
1374 first_elem = *a21_t;
1375 *a21_t = *buff_1;
1376
1377 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, y21 );
1380 m_ahead,
1381 n_ahead,
1382 buff_1,
1383 A22, rs_A, cs_A,
1384 a21, rs_A,
1385 buff_0,
1386 y21, rs_Y );
1387
1388 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, z21 );
1391 m_ahead,
1392 n_ahead,
1393 buff_1,
1394 A22, rs_A, cs_A,
1395 a21, rs_A,
1396 buff_0,
1397 z21, rs_Z );
1398
1399 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, d0 );
1400 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Y20, a21, FLA_ZERO, e0 );
1401 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, a21, FLA_ZERO, f0 );
1404 m_ahead,
1405 n_behind,
1406 buff_1,
1407 A20, rs_A, cs_A,
1408 a21, rs_A,
1409 buff_0,
1410 d0, inc_d );
1413 m_ahead,
1414 n_behind,
1415 buff_1,
1416 Y20, rs_Y, cs_Y,
1417 a21, rs_A,
1418 buff_0,
1419 e0, inc_e );
1422 m_ahead,
1423 n_behind,
1424 buff_1,
1425 Z20, rs_Z, cs_Z,
1426 a21, rs_A,
1427 buff_0,
1428 f0, inc_f );
1429
1430 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
1431 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, y21 );
1434 m_ahead,
1435 n_behind,
1436 buff_m1,
1437 Y20, rs_Y, cs_Y,
1438 d0, inc_d,
1439 buff_1,
1440 y21, rs_Y );
1443 m_ahead,
1444 n_behind,
1445 buff_m1,
1446 A20, rs_A, cs_A,
1447 f0, inc_f,
1448 buff_1,
1449 y21, rs_Y );
1450
1451 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, e0, FLA_ONE, z21 );
1452 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, d0, FLA_ONE, z21 );
1455 m_ahead,
1456 n_behind,
1457 buff_m1,
1458 A20, rs_A, cs_A,
1459 e0, inc_e,
1460 buff_1,
1461 z21, rs_Z );
1464 m_ahead,
1465 n_behind,
1466 buff_m1,
1467 Z20, rs_Z, cs_Z,
1468 d0, inc_d,
1469 buff_1,
1470 z21, rs_Z );
1471
1472 // FLA_Copy( d0, t01 );
1474 n_behind,
1475 d0, inc_d,
1476 t01, rs_T );
1477
1478 // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
1479 // FLA_Inv_scal( FLA_TWO, beta );
1480 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
1482 m_ahead,
1483 a21, rs_A,
1484 z21, rs_Z,
1485 &beta );
1488
1489 // FLA_Scal( minus_inv_tau11, conj_beta );
1490 // FLA_Axpy( conj_beta, a21, y21 );
1491 // FLA_Scal( inv_tau11, y21 );
1494 m_ahead,
1495 &conj_beta,
1496 a21, rs_A,
1497 y21, rs_Y );
1499 m_ahead,
1500 &inv_tau11,
1501 y21, rs_Y );
1502
1503 // FLA_Scal( minus_inv_tau11, beta );
1504 // FLA_Axpy( beta, a21, z21 );
1505 // FLA_Scal( inv_tau11, z21 );
1508 m_ahead,
1509 &beta,
1510 a21, rs_A,
1511 z21, rs_Z );
1513 m_ahead,
1514 &inv_tau11,
1515 z21, rs_Z );
1516
1517 // FLA_Dot( a12t, a21, dot_product );
1518 // FLA_Scal( minus_inv_tau11, dot_product );
1519 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
1521 m_ahead,
1522 a12t, cs_A,
1523 a21, rs_A,
1524 &dot_product );
1527 m_ahead,
1528 &dot_product,
1529 a21, rs_A,
1530 a12t, cs_A );
1531
1532 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, e0 );
1533 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, e0, a21, A02 );
1536 m_behind,
1537 n_ahead,
1538 buff_1,
1539 A02, rs_A, cs_A,
1540 a21, rs_A,
1541 buff_0,
1542 e0, inc_e );
1545 m_behind,
1546 n_ahead,
1548 e0, inc_e,
1549 a21, rs_A,
1550 A02, rs_A, cs_A );
1551
1552 // FLA_Copy( first_elem, a21_t );
1553 *a21_t = first_elem;
1554 }
1555
1556 /*------------------------------------------------------------*/
1557
1558 }
1559
1560 // FLA_Obj_free( &d );
1561 // FLA_Obj_free( &e );
1562 // FLA_Obj_free( &f );
1563 FLA_free( buff_d );
1564 FLA_free( buff_e );
1565 FLA_free( buff_f );
1566
1567 return FLA_SUCCESS;
1568}
FLA_Error FLA_Househ2_UT_l_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *tau)
Definition FLA_Househ2_UT.c:521
void bl1_zaxpyv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy)
Definition bl1_axpyv.c:60
void bl1_zcopyv(conj1_t conj, int m, dcomplex *x, int incx, dcomplex *y, int incy)
Definition bl1_copyv.c:63
void bl1_zdot(conj1_t conj, int n, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *rho)
Definition bl1_dot.c:65
bl1_zscals(beta, rho_yz)
void bl1_zgemv(trans1_t transa, conj1_t conjx, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition bl1_gemv.c:255
void bl1_zger(conj1_t conjx, conj1_t conjy, int m, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *a, int a_rs, int a_cs)
Definition bl1_ger.c:194
void bl1_zscalv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx)
Definition bl1_scalv.c:72
void bl1_zsetm(int m, int n, dcomplex *sigma, dcomplex *a, int a_rs, int a_cs)
Definition bl1_setm.c:78

References bl1_zaxpyv(), bl1_zcopyv(), bl1_zdot(), bl1_zgemv(), bl1_zger(), bl1_zscals(), bl1_zscalv(), bl1_zsetm(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_opt_var4().