libflame revision_anchor
Functions
FLA_Hess_UT_opt_var3.c File Reference

(r)

Functions

FLA_Error FLA_Hess_UT_opt_var3 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_opt_var3 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_ops_var3 (int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_opd_var3 (int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_opc_var3 (int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_opz_var3 (int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T)
 

Function Documentation

◆ FLA_Hess_UT_opt_var3()

FLA_Error FLA_Hess_UT_opt_var3 ( FLA_Obj  A,
FLA_Obj  T 
)
14{
15 return FLA_Hess_UT_step_opt_var3( A, T );
16}
FLA_Error FLA_Hess_UT_step_opt_var3(FLA_Obj A, FLA_Obj T)
Definition FLA_Hess_UT_opt_var3.c:18
int i
Definition bl1_axmyv2.c:145

References FLA_Hess_UT_step_opt_var3(), and i.

Referenced by FLA_Hess_UT_internal().

◆ FLA_Hess_UT_step_opc_var3()

FLA_Error FLA_Hess_UT_step_opc_var3 ( int  m_A,
int  m_T,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_T,
int  rs_T,
int  cs_T 
)
807{
812
821 int i;
822
823 // b_alg = FLA_Obj_length( T );
824 int b_alg = m_T;
825
826 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
827 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &y );
828 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
829 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &v );
830 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
831 scomplex* buff_u = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
832 scomplex* buff_y = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
833 scomplex* buff_z = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
834 scomplex* buff_v = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
835 scomplex* buff_w = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
836 int inc_u = 1;
837 int inc_y = 1;
838 int inc_z = 1;
839 int inc_v = 1;
840 int inc_w = 1;
841
842 // Initialize some variables (only to prevent compiler warnings).
845
846 for ( i = 0; i < b_alg; ++i )
847 {
848 scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
849 scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
850 scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
851 scomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
852 scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
853 scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
854
855 scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
856 scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
857
859 scomplex* u2 = buff_u + (i+1)*inc_u;
860
861 scomplex* y0 = buff_y + (0 )*inc_y;
862 scomplex* psi1 = buff_y + (i )*inc_y;
863 scomplex* y2 = buff_y + (i+1)*inc_y;
864
865 scomplex* zeta1 = buff_z + (i )*inc_z;
866 scomplex* z2 = buff_z + (i+1)*inc_z;
867
868 scomplex* v2 = buff_v + (i+1)*inc_v;
869
870 scomplex* w2 = buff_w + (i+1)*inc_w;
871
872 scomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
873 scomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
874
875 int m_ahead = m_A - i - 1;
876 int n_ahead = m_A - i - 1;
877 int m_behind = i;
878 int n_behind = i;
879
880 /*------------------------------------------------------------*/
881
882 if ( m_behind > 0 )
883 {
884 // FLA_Copy( upsilon1, minus_upsilon1 );
885 // FLA_Scal( FLA_MINUS_ONE, minus_upsilon1 );
886 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon1, minus_conj_upsilon1 );
889
890 // FLA_Copy( psi1, minus_psi1 );
891 // FLA_Scal( FLA_MINUS_ONE, minus_psi1 );
892 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, minus_psi1, minus_conj_psi1 );
895
896 // FLA_Copy( zeta1, minus_zeta1 );
897 // FLA_Scal( FLA_MINUS_ONE, minus_zeta1 );
899
900 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon1, psi1, alpha11 );
901 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_zeta1, upsilon1, alpha11 );
903 1,
905 psi1, 1,
906 alpha11, 1 );
908 1,
910 upsilon1, 1,
911 alpha11, 1 );
912
913 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_upsilon1, y2, a12t );
914 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_zeta1, u2, a12t );
916 m_ahead,
918 y2, inc_y,
919 a12t, cs_A );
921 m_ahead,
923 u2, inc_u,
924 a12t, cs_A );
925
926 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi1, u2, a21 );
927 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_upsilon1, z2, a21 );
929 m_ahead,
931 u2, inc_u,
932 a21, rs_A );
934 m_ahead,
936 z2, inc_z,
937 a21, rs_A );
938 }
939
940 if ( m_ahead > 0 )
941 {
942 // FLA_Househ2_UT( FLA_LEFT,
943 // a21_t,
944 // a21_b, tau11 );
946 a21_t,
947 a21_b, rs_A,
948 tau11 );
949
950 // FLA_Set( FLA_ONE, inv_tau11 );
951 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
952 // FLA_Copy( inv_tau11, minus_inv_tau11 );
953 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
956
957 // FLA_Copy( a21_t, first_elem );
958 // FLA_Set( FLA_ONE, a21_t );
959 first_elem = *a21_t;
960 *a21_t = *buff_1;
961 }
962
963 if ( m_behind > 0 )
964 {
965 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u2, y2, A22 );
966 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, u2, A22 );
969 m_ahead,
970 n_ahead,
971 buff_m1,
972 u2, inc_u,
973 y2, inc_y,
974 A22, rs_A, cs_A );
977 m_ahead,
978 n_ahead,
979 buff_m1,
980 z2, inc_z,
981 u2, inc_u,
982 A22, rs_A, cs_A );
983 }
984
985 if ( m_ahead > 0 )
986 {
987 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, v2 );
990 m_ahead,
991 n_ahead,
992 buff_1,
993 A22, rs_A, cs_A,
994 a21, rs_A,
995 buff_0,
996 v2, inc_v );
997
998 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, w2 );
1001 m_ahead,
1002 n_ahead,
1003 buff_1,
1004 A22, rs_A, cs_A,
1005 a21, rs_A,
1006 buff_0,
1007 w2, inc_w );
1008
1009 // FLA_Copy( a21, u2 );
1010 // FLA_Copy( v2, y2 );
1011 // FLA_Copy( w2, z2 );
1013 m_ahead,
1014 a21, rs_A,
1015 u2, inc_u );
1017 m_ahead,
1018 v2, inc_v,
1019 y2, inc_y );
1021 m_ahead,
1022 w2, inc_w,
1023 z2, inc_z );
1024
1025 // FLA_Dotc( FLA_CONJUGATE, a21, z2, beta );
1026 // FLA_Inv_scal( FLA_TWO, beta );
1027 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
1029 m_ahead,
1030 a21, rs_A,
1031 z2, inc_z,
1032 &beta );
1035
1036 // FLA_Scal( minus_inv_tau11, conj_beta );
1037 // FLA_Axpy( conj_beta, a21, y2 );
1038 // FLA_Scal( inv_tau11, y2 );
1041 m_ahead,
1042 &conj_beta,
1043 a21, rs_A,
1044 y2, inc_y );
1046 m_ahead,
1047 &inv_tau11,
1048 y2, inc_y );
1049
1050 // FLA_Scal( minus_inv_tau11, beta );
1051 // FLA_Axpy( beta, a21, z2 );
1052 // FLA_Scal( inv_tau11, z2 );
1055 m_ahead,
1056 &beta,
1057 a21, rs_A,
1058 z2, inc_z );
1060 m_ahead,
1061 &inv_tau11,
1062 z2, inc_z );
1063
1064 // FLA_Dot( a12t, a21, dot_product );
1065 // FLA_Scal( minus_inv_tau11, dot_product );
1066 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
1068 m_ahead,
1069 a12t, cs_A,
1070 a21, rs_A,
1071 &dot_product );
1074 m_ahead,
1075 &dot_product,
1076 a21, rs_A,
1077 a12t, cs_A );
1078
1079 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, y0 );
1080 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, y0, a21, A02 );
1083 m_behind,
1084 n_ahead,
1085 buff_1,
1086 A02, rs_A, cs_A,
1087 a21, rs_A,
1088 buff_0,
1089 y0, inc_y );
1092 m_behind,
1093 n_ahead,
1095 y0, inc_y,
1096 a21, rs_A,
1097 A02, rs_A, cs_A );
1098
1099 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
1102 m_ahead,
1103 n_behind,
1104 buff_1,
1105 A20, rs_A, cs_A,
1106 a21, rs_A,
1107 buff_0,
1108 t01, rs_T );
1109
1110 // FLA_Copy( first_elem, a21_t );
1111 *a21_t = first_elem;
1112 }
1113
1114 if ( m_behind + 1 == b_alg && m_ahead > 0 )
1115 {
1116 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u2, y2, A22 );
1117 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, u2, A22 );
1120 m_ahead,
1121 n_ahead,
1122 buff_m1,
1123 u2, inc_u,
1124 y2, inc_y,
1125 A22, rs_A, cs_A );
1128 m_ahead,
1129 n_ahead,
1130 buff_m1,
1131 z2, inc_z,
1132 u2, inc_u,
1133 A22, rs_A, cs_A );
1134 }
1135
1136 /*------------------------------------------------------------*/
1137
1138 }
1139
1140 // FLA_Obj_free( &u );
1141 // FLA_Obj_free( &y );
1142 // FLA_Obj_free( &z );
1143 // FLA_Obj_free( &v );
1144 // FLA_Obj_free( &w );
1145 FLA_free( buff_u );
1146 FLA_free( buff_y );
1147 FLA_free( buff_z );
1148 FLA_free( buff_v );
1149 FLA_free( buff_w );
1150
1151 return FLA_SUCCESS;
1152}
FLA_Obj FLA_TWO
Definition FLA_Init.c:17
FLA_Obj FLA_MINUS_ONE
Definition FLA_Init.c:22
FLA_Obj FLA_ZERO
Definition FLA_Init.c:20
FLA_Obj FLA_ONE
Definition FLA_Init.c:18
void FLA_free(void *ptr)
Definition FLA_Memory.c:247
void * FLA_malloc(size_t size)
Definition FLA_Memory.c:111
FLA_Error FLA_Househ2_UT_l_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *tau)
Definition FLA_Househ2_UT.c:390
double *restrict zeta1
Definition bl1_axmyv2.c:142
double *restrict psi1
Definition bl1_axmyv2.c:141
upsilon1
Definition bl1_axpyv2bdotaxpy.c:225
void bl1_caxpyv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
Definition bl1_axpyv.c:29
void bl1_ccopyv(conj1_t conj, int m, scomplex *x, int incx, scomplex *y, int incy)
Definition bl1_copyv.c:49
void bl1_cdot(conj1_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho)
Definition bl1_dot.c:39
void bl1_cgemv(trans1_t transa, conj1_t conjx, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition bl1_gemv.c:125
void bl1_cger(conj1_t conjx, conj1_t conjy, int m, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy, scomplex *a, int a_rs, int a_cs)
Definition bl1_ger.c:111
void bl1_cscalv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx)
Definition bl1_scalv.c:46
@ BLIS1_NO_TRANSPOSE
Definition blis_type_defs.h:54
@ BLIS1_CONJ_TRANSPOSE
Definition blis_type_defs.h:57
@ BLIS1_CONJUGATE
Definition blis_type_defs.h:82
@ BLIS1_NO_CONJUGATE
Definition blis_type_defs.h:81
Definition blis_type_defs.h:133

References bl1_caxpyv(), bl1_ccopyv(), bl1_cdot(), bl1_cgemv(), bl1_cger(), bl1_cscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, i, psi1, upsilon1, and zeta1.

Referenced by FLA_Hess_UT_step_opt_var3().

◆ FLA_Hess_UT_step_opd_var3()

FLA_Error FLA_Hess_UT_step_opd_var3 ( int  m_A,
int  m_T,
double buff_A,
int  rs_A,
int  cs_A,
double buff_T,
int  rs_T,
int  cs_T 
)
454{
455 double* buff_2 = FLA_DOUBLE_PTR( FLA_TWO );
456 double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
457 double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
459
460 double first_elem;
461 double dot_product;
462 double beta, conj_beta;
463 double inv_tau11;
464 double minus_inv_tau11;
467 double minus_zeta1;
468 int i;
469
470 // b_alg = FLA_Obj_length( T );
471 int b_alg = m_T;
472
473 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
474 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &y );
475 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
476 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &v );
477 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
478 double* buff_u = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
479 double* buff_y = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
480 double* buff_z = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
481 double* buff_v = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
482 double* buff_w = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
483 int inc_u = 1;
484 int inc_y = 1;
485 int inc_z = 1;
486 int inc_v = 1;
487 int inc_w = 1;
488
489 // Initialize some variables (only to prevent compiler warnings).
492
493 for ( i = 0; i < b_alg; ++i )
494 {
495 double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
496 double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
497 double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
498 double* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
499 double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
500 double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
501
502 double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
503 double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
504
505 double* upsilon1 = buff_u + (i )*inc_u;
506 double* u2 = buff_u + (i+1)*inc_u;
507
508 double* y0 = buff_y + (0 )*inc_y;
509 double* psi1 = buff_y + (i )*inc_y;
510 double* y2 = buff_y + (i+1)*inc_y;
511
512 double* zeta1 = buff_z + (i )*inc_z;
513 double* z2 = buff_z + (i+1)*inc_z;
514
515 double* v2 = buff_v + (i+1)*inc_v;
516
517 double* w2 = buff_w + (i+1)*inc_w;
518
519 double* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
520 double* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
521
522 int m_ahead = m_A - i - 1;
523 int n_ahead = m_A - i - 1;
524 int m_behind = i;
525 int n_behind = i;
526
527 /*------------------------------------------------------------*/
528
529 if ( m_behind > 0 )
530 {
531 // FLA_Copy( upsilon1, minus_upsilon1 );
532 // FLA_Scal( FLA_MINUS_ONE, minus_upsilon1 );
533 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon1, minus_conj_upsilon1 );
536
537 // FLA_Copy( psi1, minus_psi1 );
538 // FLA_Scal( FLA_MINUS_ONE, minus_psi1 );
539 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, minus_psi1, minus_conj_psi1 );
542
543 // FLA_Copy( zeta1, minus_zeta1 );
544 // FLA_Scal( FLA_MINUS_ONE, minus_zeta1 );
546
547 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon1, psi1, alpha11 );
548 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_zeta1, upsilon1, alpha11 );
550 1,
552 psi1, 1,
553 alpha11, 1 );
555 1,
557 upsilon1, 1,
558 alpha11, 1 );
559
560 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_upsilon1, y2, a12t );
561 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_zeta1, u2, a12t );
563 m_ahead,
565 y2, inc_y,
566 a12t, cs_A );
568 m_ahead,
570 u2, inc_u,
571 a12t, cs_A );
572
573 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi1, u2, a21 );
574 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_upsilon1, z2, a21 );
576 m_ahead,
578 u2, inc_u,
579 a21, rs_A );
581 m_ahead,
583 z2, inc_z,
584 a21, rs_A );
585 }
586
587 if ( m_ahead > 0 )
588 {
589 // FLA_Househ2_UT( FLA_LEFT,
590 // a21_t,
591 // a21_b, tau11 );
593 a21_t,
594 a21_b, rs_A,
595 tau11 );
596
597 // FLA_Set( FLA_ONE, inv_tau11 );
598 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
599 // FLA_Copy( inv_tau11, minus_inv_tau11 );
600 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
603
604 // FLA_Copy( a21_t, first_elem );
605 // FLA_Set( FLA_ONE, a21_t );
606 first_elem = *a21_t;
607 *a21_t = *buff_1;
608 }
609
610 if ( m_behind > 0 )
611 {
612 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u2, y2, A22 );
613 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, u2, A22 );
616 m_ahead,
617 n_ahead,
618 buff_m1,
619 u2, inc_u,
620 y2, inc_y,
621 A22, rs_A, cs_A );
624 m_ahead,
625 n_ahead,
626 buff_m1,
627 z2, inc_z,
628 u2, inc_u,
629 A22, rs_A, cs_A );
630 }
631
632 if ( m_ahead > 0 )
633 {
634 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, v2 );
637 m_ahead,
638 n_ahead,
639 buff_1,
640 A22, rs_A, cs_A,
641 a21, rs_A,
642 buff_0,
643 v2, inc_v );
644
645 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, w2 );
648 m_ahead,
649 n_ahead,
650 buff_1,
651 A22, rs_A, cs_A,
652 a21, rs_A,
653 buff_0,
654 w2, inc_w );
655
656 // FLA_Copy( a21, u2 );
657 // FLA_Copy( v2, y2 );
658 // FLA_Copy( w2, z2 );
660 m_ahead,
661 a21, rs_A,
662 u2, inc_u );
664 m_ahead,
665 v2, inc_v,
666 y2, inc_y );
668 m_ahead,
669 w2, inc_w,
670 z2, inc_z );
671
672 // FLA_Dotc( FLA_CONJUGATE, a21, z2, beta );
673 // FLA_Inv_scal( FLA_TWO, beta );
674 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
676 m_ahead,
677 a21, rs_A,
678 z2, inc_z,
679 &beta );
682
683 // FLA_Scal( minus_inv_tau11, conj_beta );
684 // FLA_Axpy( conj_beta, a21, y2 );
685 // FLA_Scal( inv_tau11, y2 );
688 m_ahead,
689 &conj_beta,
690 a21, rs_A,
691 y2, inc_y );
693 m_ahead,
694 &inv_tau11,
695 y2, inc_y );
696
697 // FLA_Scal( minus_inv_tau11, beta );
698 // FLA_Axpy( beta, a21, z2 );
699 // FLA_Scal( inv_tau11, z2 );
702 m_ahead,
703 &beta,
704 a21, rs_A,
705 z2, inc_z );
707 m_ahead,
708 &inv_tau11,
709 z2, inc_z );
710
711 // FLA_Dot( a12t, a21, dot_product );
712 // FLA_Scal( minus_inv_tau11, dot_product );
713 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
715 m_ahead,
716 a12t, cs_A,
717 a21, rs_A,
718 &dot_product );
721 m_ahead,
723 a21, rs_A,
724 a12t, cs_A );
725
726 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, y0 );
727 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, y0, a21, A02 );
730 m_behind,
731 n_ahead,
732 buff_1,
733 A02, rs_A, cs_A,
734 a21, rs_A,
735 buff_0,
736 y0, inc_y );
739 m_behind,
740 n_ahead,
742 y0, inc_y,
743 a21, rs_A,
744 A02, rs_A, cs_A );
745
746 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
749 m_ahead,
750 n_behind,
751 buff_1,
752 A20, rs_A, cs_A,
753 a21, rs_A,
754 buff_0,
755 t01, rs_T );
756
757 // FLA_Copy( first_elem, a21_t );
758 *a21_t = first_elem;
759 }
760
761 if ( m_behind + 1 == b_alg && m_ahead > 0 )
762 {
763 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u2, y2, A22 );
764 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, u2, A22 );
767 m_ahead,
768 n_ahead,
769 buff_m1,
770 u2, inc_u,
771 y2, inc_y,
772 A22, rs_A, cs_A );
775 m_ahead,
776 n_ahead,
777 buff_m1,
778 z2, inc_z,
779 u2, inc_u,
780 A22, rs_A, cs_A );
781 }
782
783 /*------------------------------------------------------------*/
784
785 }
786
787 // FLA_Obj_free( &u );
788 // FLA_Obj_free( &y );
789 // FLA_Obj_free( &z );
790 // FLA_Obj_free( &v );
791 // FLA_Obj_free( &w );
792 FLA_free( buff_u );
793 FLA_free( buff_y );
794 FLA_free( buff_z );
795 FLA_free( buff_v );
796 FLA_free( buff_w );
797
798 return FLA_SUCCESS;
799}
FLA_Error FLA_Househ2_UT_l_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *tau)
Definition FLA_Househ2_UT.c:274
void bl1_daxpyv(conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy)
Definition bl1_axpyv.c:21
void bl1_dcopyv(conj1_t conj, int m, double *x, int incx, double *y, int incy)
Definition bl1_copyv.c:42
void bl1_ddot(conj1_t conj, int n, double *x, int incx, double *y, int incy, double *rho)
Definition bl1_dot.c:26
void bl1_dgemv(trans1_t transa, conj1_t conjx, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
Definition bl1_gemv.c:69
void bl1_dger(conj1_t conjx, conj1_t conjy, int m, int n, double *alpha, double *x, int incx, double *y, int incy, double *a, int a_rs, int a_cs)
Definition bl1_ger.c:62
void bl1_dscalv(conj1_t conj, int n, double *alpha, double *x, int incx)
Definition bl1_scalv.c:24

References bl1_daxpyv(), bl1_dcopyv(), bl1_ddot(), bl1_dgemv(), bl1_dger(), bl1_dscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, i, psi1, upsilon1, and zeta1.

Referenced by FLA_Hess_UT_step_opt_var3().

◆ FLA_Hess_UT_step_ops_var3()

FLA_Error FLA_Hess_UT_step_ops_var3 ( int  m_A,
int  m_T,
float buff_A,
int  rs_A,
int  cs_A,
float buff_T,
int  rs_T,
int  cs_T 
)
101{
102 float* buff_2 = FLA_FLOAT_PTR( FLA_TWO );
103 float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
104 float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
106
107 float first_elem;
108 float dot_product;
109 float beta, conj_beta;
110 float inv_tau11;
111 float minus_inv_tau11;
114 float minus_zeta1;
115 int i;
116
117 // b_alg = FLA_Obj_length( T );
118 int b_alg = m_T;
119
120 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
121 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &y );
122 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
123 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &v );
124 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
125 float* buff_u = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
126 float* buff_y = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
127 float* buff_z = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
128 float* buff_v = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
129 float* buff_w = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
130 int inc_u = 1;
131 int inc_y = 1;
132 int inc_z = 1;
133 int inc_v = 1;
134 int inc_w = 1;
135
136 // Initialize some variables (only to prevent compiler warnings).
139
140 for ( i = 0; i < b_alg; ++i )
141 {
142 float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
143 float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
144 float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
145 float* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
146 float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
147 float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
148
149 float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
150 float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
151
152 float* upsilon1 = buff_u + (i )*inc_u;
153 float* u2 = buff_u + (i+1)*inc_u;
154
155 float* y0 = buff_y + (0 )*inc_y;
156 float* psi1 = buff_y + (i )*inc_y;
157 float* y2 = buff_y + (i+1)*inc_y;
158
159 float* zeta1 = buff_z + (i )*inc_z;
160 float* z2 = buff_z + (i+1)*inc_z;
161
162 float* v2 = buff_v + (i+1)*inc_v;
163
164 float* w2 = buff_w + (i+1)*inc_w;
165
166 float* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
167 float* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
168
169 int m_ahead = m_A - i - 1;
170 int n_ahead = m_A - i - 1;
171 int m_behind = i;
172 int n_behind = i;
173
174 /*------------------------------------------------------------*/
175
176 if ( m_behind > 0 )
177 {
178 // FLA_Copy( upsilon1, minus_upsilon1 );
179 // FLA_Scal( FLA_MINUS_ONE, minus_upsilon1 );
180 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon1, minus_conj_upsilon1 );
183
184 // FLA_Copy( psi1, minus_psi1 );
185 // FLA_Scal( FLA_MINUS_ONE, minus_psi1 );
186 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, minus_psi1, minus_conj_psi1 );
189
190 // FLA_Copy( zeta1, minus_zeta1 );
191 // FLA_Scal( FLA_MINUS_ONE, minus_zeta1 );
193
194 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon1, psi1, alpha11 );
195 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_zeta1, upsilon1, alpha11 );
197 1,
199 psi1, 1,
200 alpha11, 1 );
202 1,
204 upsilon1, 1,
205 alpha11, 1 );
206
207 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_upsilon1, y2, a12t );
208 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_zeta1, u2, a12t );
210 m_ahead,
212 y2, inc_y,
213 a12t, cs_A );
215 m_ahead,
217 u2, inc_u,
218 a12t, cs_A );
219
220 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi1, u2, a21 );
221 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_upsilon1, z2, a21 );
223 m_ahead,
225 u2, inc_u,
226 a21, rs_A );
228 m_ahead,
230 z2, inc_z,
231 a21, rs_A );
232 }
233
234 if ( m_ahead > 0 )
235 {
236 // FLA_Househ2_UT( FLA_LEFT,
237 // a21_t,
238 // a21_b, tau11 );
240 a21_t,
241 a21_b, rs_A,
242 tau11 );
243
244 // FLA_Set( FLA_ONE, inv_tau11 );
245 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
246 // FLA_Copy( inv_tau11, minus_inv_tau11 );
247 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
250
251 // FLA_Copy( a21_t, first_elem );
252 // FLA_Set( FLA_ONE, a21_t );
253 first_elem = *a21_t;
254 *a21_t = *buff_1;
255 }
256
257 if ( m_behind > 0 )
258 {
259 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u2, y2, A22 );
260 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, u2, A22 );
263 m_ahead,
264 n_ahead,
265 buff_m1,
266 u2, inc_u,
267 y2, inc_y,
268 A22, rs_A, cs_A );
271 m_ahead,
272 n_ahead,
273 buff_m1,
274 z2, inc_z,
275 u2, inc_u,
276 A22, rs_A, cs_A );
277 }
278
279 if ( m_ahead > 0 )
280 {
281 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, v2 );
284 m_ahead,
285 n_ahead,
286 buff_1,
287 A22, rs_A, cs_A,
288 a21, rs_A,
289 buff_0,
290 v2, inc_v );
291
292 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, w2 );
295 m_ahead,
296 n_ahead,
297 buff_1,
298 A22, rs_A, cs_A,
299 a21, rs_A,
300 buff_0,
301 w2, inc_w );
302
303 // FLA_Copy( a21, u2 );
304 // FLA_Copy( v2, y2 );
305 // FLA_Copy( w2, z2 );
307 m_ahead,
308 a21, rs_A,
309 u2, inc_u );
311 m_ahead,
312 v2, inc_v,
313 y2, inc_y );
315 m_ahead,
316 w2, inc_w,
317 z2, inc_z );
318
319 // FLA_Dotc( FLA_CONJUGATE, a21, z2, beta );
320 // FLA_Inv_scal( FLA_TWO, beta );
321 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
323 m_ahead,
324 a21, rs_A,
325 z2, inc_z,
326 &beta );
329
330 // FLA_Scal( minus_inv_tau11, conj_beta );
331 // FLA_Axpy( conj_beta, a21, y2 );
332 // FLA_Scal( inv_tau11, y2 );
335 m_ahead,
336 &conj_beta,
337 a21, rs_A,
338 y2, inc_y );
340 m_ahead,
341 &inv_tau11,
342 y2, inc_y );
343
344 // FLA_Scal( minus_inv_tau11, beta );
345 // FLA_Axpy( beta, a21, z2 );
346 // FLA_Scal( inv_tau11, z2 );
349 m_ahead,
350 &beta,
351 a21, rs_A,
352 z2, inc_z );
354 m_ahead,
355 &inv_tau11,
356 z2, inc_z );
357
358 // FLA_Dot( a12t, a21, dot_product );
359 // FLA_Scal( minus_inv_tau11, dot_product );
360 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
362 m_ahead,
363 a12t, cs_A,
364 a21, rs_A,
365 &dot_product );
368 m_ahead,
370 a21, rs_A,
371 a12t, cs_A );
372
373 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, y0 );
374 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, y0, a21, A02 );
377 m_behind,
378 n_ahead,
379 buff_1,
380 A02, rs_A, cs_A,
381 a21, rs_A,
382 buff_0,
383 y0, inc_y );
386 m_behind,
387 n_ahead,
389 y0, inc_y,
390 a21, rs_A,
391 A02, rs_A, cs_A );
392
393 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
396 m_ahead,
397 n_behind,
398 buff_1,
399 A20, rs_A, cs_A,
400 a21, rs_A,
401 buff_0,
402 t01, rs_T );
403
404 // FLA_Copy( first_elem, a21_t );
405 *a21_t = first_elem;
406 }
407
408 if ( m_behind + 1 == b_alg && m_ahead > 0 )
409 {
410 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u2, y2, A22 );
411 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, u2, A22 );
414 m_ahead,
415 n_ahead,
416 buff_m1,
417 u2, inc_u,
418 y2, inc_y,
419 A22, rs_A, cs_A );
422 m_ahead,
423 n_ahead,
424 buff_m1,
425 z2, inc_z,
426 u2, inc_u,
427 A22, rs_A, cs_A );
428 }
429
430 /*------------------------------------------------------------*/
431
432 }
433
434 // FLA_Obj_free( &u );
435 // FLA_Obj_free( &y );
436 // FLA_Obj_free( &z );
437 // FLA_Obj_free( &v );
438 // FLA_Obj_free( &w );
439 FLA_free( buff_u );
440 FLA_free( buff_y );
441 FLA_free( buff_z );
442 FLA_free( buff_v );
443 FLA_free( buff_w );
444
445 return FLA_SUCCESS;
446}
FLA_Error FLA_Househ2_UT_l_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *tau)
Definition FLA_Househ2_UT.c:160
void bl1_saxpyv(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy)
Definition bl1_axpyv.c:13
void bl1_scopyv(conj1_t conj, int m, float *x, int incx, float *y, int incy)
Definition bl1_copyv.c:35
void bl1_sdot(conj1_t conj, int n, float *x, int incx, float *y, int incy, float *rho)
Definition bl1_dot.c:13
void bl1_sgemv(trans1_t transa, conj1_t conjx, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
Definition bl1_gemv.c:13
void bl1_sger(conj1_t conjx, conj1_t conjy, int m, int n, float *alpha, float *x, int incx, float *y, int incy, float *a, int a_rs, int a_cs)
Definition bl1_ger.c:13
void bl1_sscalv(conj1_t conj, int n, float *alpha, float *x, int incx)
Definition bl1_scalv.c:13

References bl1_saxpyv(), bl1_scopyv(), bl1_sdot(), bl1_sgemv(), bl1_sger(), bl1_sscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, i, psi1, upsilon1, and zeta1.

Referenced by FLA_Hess_UT_step_opt_var3().

◆ FLA_Hess_UT_step_opt_var3()

FLA_Error FLA_Hess_UT_step_opt_var3 ( FLA_Obj  A,
FLA_Obj  T 
)
19{
20 FLA_Datatype datatype;
21 int m_A, m_T;
22 int rs_A, cs_A;
23 int rs_T, cs_T;
24
25 datatype = FLA_Obj_datatype( A );
26
27 m_A = FLA_Obj_length( A );
28 m_T = FLA_Obj_length( T );
29
32
35
36
37 switch ( datatype )
38 {
39 case FLA_FLOAT:
40 {
41 float* buff_A = FLA_FLOAT_PTR( A );
42 float* buff_T = FLA_FLOAT_PTR( T );
43
45 m_T,
47 buff_T, rs_T, cs_T );
48
49 break;
50 }
51
52 case FLA_DOUBLE:
53 {
54 double* buff_A = FLA_DOUBLE_PTR( A );
55 double* buff_T = FLA_DOUBLE_PTR( T );
56
58 m_T,
60 buff_T, rs_T, cs_T );
61
62 break;
63 }
64
65 case FLA_COMPLEX:
66 {
69
71 m_T,
73 buff_T, rs_T, cs_T );
74
75 break;
76 }
77
79 {
82
84 m_T,
86 buff_T, rs_T, cs_T );
87
88 break;
89 }
90 }
91
92 return FLA_SUCCESS;
93}
FLA_Error FLA_Hess_UT_step_opz_var3(int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T)
Definition FLA_Hess_UT_opt_var3.c:1156
FLA_Error FLA_Hess_UT_step_opd_var3(int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T)
Definition FLA_Hess_UT_opt_var3.c:450
FLA_Error FLA_Hess_UT_step_opc_var3(int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T)
Definition FLA_Hess_UT_opt_var3.c:803
FLA_Error FLA_Hess_UT_step_ops_var3(int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T)
Definition FLA_Hess_UT_opt_var3.c:97
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition FLA_Query.c:167
dim_t FLA_Obj_length(FLA_Obj obj)
Definition FLA_Query.c:116
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition FLA_Query.c:174
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition FLA_Query.c:13
int FLA_Datatype
Definition FLA_type_defs.h:49
Definition blis_type_defs.h:138

References FLA_Hess_UT_step_opc_var3(), FLA_Hess_UT_step_opd_var3(), FLA_Hess_UT_step_ops_var3(), FLA_Hess_UT_step_opz_var3(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and i.

Referenced by FLA_Hess_UT_blk_var3(), and FLA_Hess_UT_opt_var3().

◆ FLA_Hess_UT_step_opz_var3()

FLA_Error FLA_Hess_UT_step_opz_var3 ( int  m_A,
int  m_T,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_T,
int  rs_T,
int  cs_T 
)
1160{
1165
1174 int i;
1175
1176 // b_alg = FLA_Obj_length( T );
1177 int b_alg = m_T;
1178
1179 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
1180 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &y );
1181 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
1182 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &v );
1183 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
1184 dcomplex* buff_u = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1185 dcomplex* buff_y = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1186 dcomplex* buff_z = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1187 dcomplex* buff_v = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1188 dcomplex* buff_w = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1189 int inc_u = 1;
1190 int inc_y = 1;
1191 int inc_z = 1;
1192 int inc_v = 1;
1193 int inc_w = 1;
1194
1195 // Initialize some variables (only to prevent compiler warnings).
1196 first_elem = *buff_0;
1198
1199 for ( i = 0; i < b_alg; ++i )
1200 {
1201 dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
1202 dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
1203 dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
1204 dcomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
1205 dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
1206 dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
1207
1208 dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
1209 dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
1210
1211 dcomplex* upsilon1 = buff_u + (i )*inc_u;
1212 dcomplex* u2 = buff_u + (i+1)*inc_u;
1213
1214 dcomplex* y0 = buff_y + (0 )*inc_y;
1215 dcomplex* psi1 = buff_y + (i )*inc_y;
1216 dcomplex* y2 = buff_y + (i+1)*inc_y;
1217
1218 dcomplex* zeta1 = buff_z + (i )*inc_z;
1219 dcomplex* z2 = buff_z + (i+1)*inc_z;
1220
1221 dcomplex* v2 = buff_v + (i+1)*inc_v;
1222
1223 dcomplex* w2 = buff_w + (i+1)*inc_w;
1224
1225 dcomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
1226 dcomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
1227
1228 int m_ahead = m_A - i - 1;
1229 int n_ahead = m_A - i - 1;
1230 int m_behind = i;
1231 int n_behind = i;
1232
1233 /*------------------------------------------------------------*/
1234
1235 if ( m_behind > 0 )
1236 {
1237 // FLA_Copy( upsilon1, minus_upsilon1 );
1238 // FLA_Scal( FLA_MINUS_ONE, minus_upsilon1 );
1239 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon1, minus_conj_upsilon1 );
1242
1243 // FLA_Copy( psi1, minus_psi1 );
1244 // FLA_Scal( FLA_MINUS_ONE, minus_psi1 );
1245 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, minus_psi1, minus_conj_psi1 );
1248
1249 // FLA_Copy( zeta1, minus_zeta1 );
1250 // FLA_Scal( FLA_MINUS_ONE, minus_zeta1 );
1252
1253 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon1, psi1, alpha11 );
1254 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_zeta1, upsilon1, alpha11 );
1256 1,
1258 psi1, 1,
1259 alpha11, 1 );
1261 1,
1262 &minus_zeta1,
1263 upsilon1, 1,
1264 alpha11, 1 );
1265
1266 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_upsilon1, y2, a12t );
1267 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_zeta1, u2, a12t );
1269 m_ahead,
1271 y2, inc_y,
1272 a12t, cs_A );
1274 m_ahead,
1275 &minus_zeta1,
1276 u2, inc_u,
1277 a12t, cs_A );
1278
1279 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi1, u2, a21 );
1280 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_upsilon1, z2, a21 );
1282 m_ahead,
1284 u2, inc_u,
1285 a21, rs_A );
1287 m_ahead,
1289 z2, inc_z,
1290 a21, rs_A );
1291 }
1292
1293 if ( m_ahead > 0 )
1294 {
1295 // FLA_Househ2_UT( FLA_LEFT,
1296 // a21_t,
1297 // a21_b, tau11 );
1299 a21_t,
1300 a21_b, rs_A,
1301 tau11 );
1302
1303 // FLA_Set( FLA_ONE, inv_tau11 );
1304 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
1305 // FLA_Copy( inv_tau11, minus_inv_tau11 );
1306 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
1309
1310 // FLA_Copy( a21_t, first_elem );
1311 // FLA_Set( FLA_ONE, a21_t );
1312 first_elem = *a21_t;
1313 *a21_t = *buff_1;
1314 }
1315
1316 if ( m_behind > 0 )
1317 {
1318 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u2, y2, A22 );
1319 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, u2, A22 );
1322 m_ahead,
1323 n_ahead,
1324 buff_m1,
1325 u2, inc_u,
1326 y2, inc_y,
1327 A22, rs_A, cs_A );
1330 m_ahead,
1331 n_ahead,
1332 buff_m1,
1333 z2, inc_z,
1334 u2, inc_u,
1335 A22, rs_A, cs_A );
1336 }
1337
1338 if ( m_ahead > 0 )
1339 {
1340 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, v2 );
1343 m_ahead,
1344 n_ahead,
1345 buff_1,
1346 A22, rs_A, cs_A,
1347 a21, rs_A,
1348 buff_0,
1349 v2, inc_v );
1350
1351 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, w2 );
1354 m_ahead,
1355 n_ahead,
1356 buff_1,
1357 A22, rs_A, cs_A,
1358 a21, rs_A,
1359 buff_0,
1360 w2, inc_w );
1361
1362 // FLA_Copy( a21, u2 );
1363 // FLA_Copy( v2, y2 );
1364 // FLA_Copy( w2, z2 );
1366 m_ahead,
1367 a21, rs_A,
1368 u2, inc_u );
1370 m_ahead,
1371 v2, inc_v,
1372 y2, inc_y );
1374 m_ahead,
1375 w2, inc_w,
1376 z2, inc_z );
1377
1378 // FLA_Dotc( FLA_CONJUGATE, a21, z2, beta );
1379 // FLA_Inv_scal( FLA_TWO, beta );
1380 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
1382 m_ahead,
1383 a21, rs_A,
1384 z2, inc_z,
1385 &beta );
1388
1389 // FLA_Scal( minus_inv_tau11, conj_beta );
1390 // FLA_Axpy( conj_beta, a21, y2 );
1391 // FLA_Scal( inv_tau11, y2 );
1394 m_ahead,
1395 &conj_beta,
1396 a21, rs_A,
1397 y2, inc_y );
1399 m_ahead,
1400 &inv_tau11,
1401 y2, inc_y );
1402
1403 // FLA_Scal( minus_inv_tau11, beta );
1404 // FLA_Axpy( beta, a21, z2 );
1405 // FLA_Scal( inv_tau11, z2 );
1408 m_ahead,
1409 &beta,
1410 a21, rs_A,
1411 z2, inc_z );
1413 m_ahead,
1414 &inv_tau11,
1415 z2, inc_z );
1416
1417 // FLA_Dot( a12t, a21, dot_product );
1418 // FLA_Scal( minus_inv_tau11, dot_product );
1419 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
1421 m_ahead,
1422 a12t, cs_A,
1423 a21, rs_A,
1424 &dot_product );
1427 m_ahead,
1428 &dot_product,
1429 a21, rs_A,
1430 a12t, cs_A );
1431
1432 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, y0 );
1433 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, y0, a21, A02 );
1436 m_behind,
1437 n_ahead,
1438 buff_1,
1439 A02, rs_A, cs_A,
1440 a21, rs_A,
1441 buff_0,
1442 y0, inc_y );
1445 m_behind,
1446 n_ahead,
1448 y0, inc_y,
1449 a21, rs_A,
1450 A02, rs_A, cs_A );
1451
1452 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
1455 m_ahead,
1456 n_behind,
1457 buff_1,
1458 A20, rs_A, cs_A,
1459 a21, rs_A,
1460 buff_0,
1461 t01, rs_T );
1462
1463 // FLA_Copy( first_elem, a21_t );
1464 *a21_t = first_elem;
1465 }
1466
1467 if ( m_behind + 1 == b_alg && m_ahead > 0 )
1468 {
1469 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u2, y2, A22 );
1470 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, u2, A22 );
1473 m_ahead,
1474 n_ahead,
1475 buff_m1,
1476 u2, inc_u,
1477 y2, inc_y,
1478 A22, rs_A, cs_A );
1481 m_ahead,
1482 n_ahead,
1483 buff_m1,
1484 z2, inc_z,
1485 u2, inc_u,
1486 A22, rs_A, cs_A );
1487 }
1488
1489 /*------------------------------------------------------------*/
1490
1491 }
1492
1493 // FLA_Obj_free( &u );
1494 // FLA_Obj_free( &y );
1495 // FLA_Obj_free( &z );
1496 // FLA_Obj_free( &v );
1497 // FLA_Obj_free( &w );
1498 FLA_free( buff_u );
1499 FLA_free( buff_y );
1500 FLA_free( buff_z );
1501 FLA_free( buff_v );
1502 FLA_free( buff_w );
1503
1504 return FLA_SUCCESS;
1505}
FLA_Error FLA_Househ2_UT_l_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *tau)
Definition FLA_Househ2_UT.c:521
void bl1_zaxpyv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy)
Definition bl1_axpyv.c:60
void bl1_zcopyv(conj1_t conj, int m, dcomplex *x, int incx, dcomplex *y, int incy)
Definition bl1_copyv.c:63
void bl1_zdot(conj1_t conj, int n, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *rho)
Definition bl1_dot.c:65
bl1_zscals(beta, rho_yz)
void bl1_zgemv(trans1_t transa, conj1_t conjx, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition bl1_gemv.c:255
void bl1_zger(conj1_t conjx, conj1_t conjy, int m, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *a, int a_rs, int a_cs)
Definition bl1_ger.c:194
void bl1_zscalv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx)
Definition bl1_scalv.c:72

References bl1_zaxpyv(), bl1_zcopyv(), bl1_zdot(), bl1_zgemv(), bl1_zger(), bl1_zscals(), bl1_zscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, i, psi1, upsilon1, and zeta1.

Referenced by FLA_Hess_UT_step_opt_var3().