libflame revision_anchor
Functions
FLA_Sylv_hn.h File Reference

(r)

Go to the source code of this file.

Functions

FLA_Error FLA_Sylv_hn_blk_var1 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale, fla_sylv_t *cntl)
 
FLA_Error FLA_Sylv_hn_blk_var2 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale, fla_sylv_t *cntl)
 
FLA_Error FLA_Sylv_hn_blk_var3 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale, fla_sylv_t *cntl)
 
FLA_Error FLA_Sylv_hn_blk_var4 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale, fla_sylv_t *cntl)
 
FLA_Error FLA_Sylv_hn_blk_var5 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale, fla_sylv_t *cntl)
 
FLA_Error FLA_Sylv_hn_blk_var6 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale, fla_sylv_t *cntl)
 
FLA_Error FLA_Sylv_hn_blk_var7 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale, fla_sylv_t *cntl)
 
FLA_Error FLA_Sylv_hn_blk_var8 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale, fla_sylv_t *cntl)
 
FLA_Error FLA_Sylv_hn_blk_var9 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale, fla_sylv_t *cntl)
 
FLA_Error FLA_Sylv_hn_blk_var10 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale, fla_sylv_t *cntl)
 
FLA_Error FLA_Sylv_hn_blk_var11 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale, fla_sylv_t *cntl)
 
FLA_Error FLA_Sylv_hn_blk_var12 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale, fla_sylv_t *cntl)
 
FLA_Error FLA_Sylv_hn_blk_var13 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale, fla_sylv_t *cntl)
 
FLA_Error FLA_Sylv_hn_blk_var14 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale, fla_sylv_t *cntl)
 
FLA_Error FLA_Sylv_hn_blk_var15 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale, fla_sylv_t *cntl)
 
FLA_Error FLA_Sylv_hn_blk_var16 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale, fla_sylv_t *cntl)
 
FLA_Error FLA_Sylv_hn_blk_var17 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale, fla_sylv_t *cntl)
 
FLA_Error FLA_Sylv_hn_blk_var18 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale, fla_sylv_t *cntl)
 
FLA_Error FLA_Sylv_hn_opt_var1 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale)
 
FLA_Error FLA_Sylv_hn_opt_var2 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale)
 
FLA_Error FLA_Sylv_hn_opt_var3 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale)
 
FLA_Error FLA_Sylv_hn_opt_var4 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale)
 
FLA_Error FLA_Sylv_hn_opt_var5 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale)
 
FLA_Error FLA_Sylv_hn_opt_var6 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale)
 
FLA_Error FLA_Sylv_hn_opt_var7 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale)
 
FLA_Error FLA_Sylv_hn_opt_var8 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale)
 
FLA_Error FLA_Sylv_hn_opt_var9 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale)
 
FLA_Error FLA_Sylv_hn_opt_var10 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale)
 
FLA_Error FLA_Sylv_hn_opt_var11 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale)
 
FLA_Error FLA_Sylv_hn_opt_var12 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale)
 
FLA_Error FLA_Sylv_hn_opt_var13 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale)
 
FLA_Error FLA_Sylv_hn_opt_var14 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale)
 
FLA_Error FLA_Sylv_hn_opt_var15 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale)
 
FLA_Error FLA_Sylv_hn_opt_var16 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale)
 
FLA_Error FLA_Sylv_hn_opt_var17 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale)
 
FLA_Error FLA_Sylv_hn_opt_var18 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale)
 
FLA_Error FLA_Sylv_hn_ops_var1 (float sgn, int m_C, int n_C, float *buff_A, int rs_A, int cs_A, float *buff_B, int rs_B, int cs_B, float *buff_C, int rs_C, int cs_C, float *buff_scale, int *info)
 
FLA_Error FLA_Sylv_hn_opd_var1 (double sgn, int m_C, int n_C, double *buff_A, int rs_A, int cs_A, double *buff_B, int rs_B, int cs_B, double *buff_C, int rs_C, int cs_C, double *buff_scale, int *info)
 
FLA_Error FLA_Sylv_hn_opc_var1 (float sgn, int m_C, int n_C, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_B, int rs_B, int cs_B, scomplex *buff_C, int rs_C, int cs_C, scomplex *buff_scale, int *info)
 
FLA_Error FLA_Sylv_hn_opz_var1 (double sgn, int m_C, int n_C, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_B, int rs_B, int cs_B, dcomplex *buff_C, int rs_C, int cs_C, dcomplex *buff_scale, int *info)
 

Function Documentation

◆ FLA_Sylv_hn_blk_var1()

FLA_Error FLA_Sylv_hn_blk_var1 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale,
fla_sylv_t cntl 
)
16{
17 FLA_Obj ATL, ATR, A00, A01, A02,
18 ABL, ABR, A10, A11, A12,
19 A20, A21, A22;
20
21 FLA_Obj BTL, BTR, B00, B01, B02,
22 BBL, BBR, B10, B11, B12,
23 B20, B21, B22;
24
25 FLA_Obj CTL, CTR, C00, C01, C02,
26 CBL, CBR, C10, C11, C12,
27 C20, C21, C22;
28
29 dim_t b;
30
31 FLA_Part_2x2( A, &ATL, &ATR,
32 &ABL, &ABR, 0, 0, FLA_TL );
33
34 FLA_Part_2x2( B, &BTL, &BTR,
35 &BBL, &BBR, 0, 0, FLA_TL );
36
37 FLA_Part_2x2( C, &CTL, &CTR,
38 &CBL, &CBR, 0, 0, FLA_TL );
39
40 while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){
41
43
44 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
45 /* ************* */ /* ******************** */
46 &A10, /**/ &A11, &A12,
47 ABL, /**/ ABR, &A20, /**/ &A21, &A22,
48 b, b, FLA_BR );
49
50 FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, /**/ &B01, &B02,
51 /* ************* */ /* ******************** */
52 &B10, /**/ &B11, &B12,
53 BBL, /**/ BBR, &B20, /**/ &B21, &B22,
54 b, b, FLA_BR );
55
56 FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, /**/ &C01, &C02,
57 /* ************* */ /* ******************** */
58 &C10, /**/ &C11, &C12,
59 CBL, /**/ CBR, &C20, /**/ &C21, &C22,
60 b, b, FLA_BR );
61
62 // Loop Invariant:
63 // CTL = sylv( ATL', BTL, CTL )
64 // CTR = CTR
65 // CBL = CBL
66 // CBR = CBR
67
68 /*------------------------------------------------------------*/
69
70 // C10 = sylv( A11', B00, C10 - A01' * C00 );
73 FLA_Cntl_sub_gemm1( cntl ) );
74
76 isgn, A11, B00, C10, scale,
77 FLA_Cntl_sub_sylv1( cntl ) );
78
79 // C01 = sylv( A00', B11, C01 -/+ C00 * B01 );
82 FLA_Cntl_sub_gemm2( cntl ) );
83
85 isgn, A00, B11, C01, scale,
86 FLA_Cntl_sub_sylv2( cntl ) );
87
88 // C11 = sylv( A11', B11, C11 - A01' * C01 -/+ C10 * B01 );
91 FLA_Cntl_sub_gemm3( cntl ) );
92
95 FLA_Cntl_sub_gemm4( cntl ) );
96
98 isgn, A11, B11, C11, scale,
99 FLA_Cntl_sub_sylv3( cntl ) );
100
101 /*------------------------------------------------------------*/
102
103 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
104 A10, A11, /**/ A12,
105 /* ************** */ /* ****************** */
106 &ABL, /**/ &ABR, A20, A21, /**/ A22,
107 FLA_TL );
108
109 FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, B01, /**/ B02,
110 B10, B11, /**/ B12,
111 /* ************** */ /* ****************** */
112 &BBL, /**/ &BBR, B20, B21, /**/ B22,
113 FLA_TL );
114
115 FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, C01, /**/ C02,
116 C10, C11, /**/ C12,
117 /* ************** */ /* ****************** */
118 &CBL, /**/ &CBR, C20, C21, /**/ C22,
119 FLA_TL );
120
121 }
122
123 return FLA_SUCCESS;
124}
FLA_Error FLA_Gemm_internal(FLA_Trans transa, FLA_Trans transb, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C, fla_gemm_t *cntl)
Definition FLA_Gemm_internal.c:16
FLA_Error FLA_Sylv_internal(FLA_Trans transa, FLA_Trans transb, FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale, fla_sylv_t *cntl)
Definition FLA_Sylv_internal.c:16
FLA_Obj FLA_MINUS_ONE
Definition FLA_Init.c:22
FLA_Obj FLA_ONE
Definition FLA_Init.c:18
FLA_Error FLA_Cont_with_3x3_to_2x2(FLA_Obj *ATL, FLA_Obj *ATR, FLA_Obj A00, FLA_Obj A01, FLA_Obj A02, FLA_Obj A10, FLA_Obj A11, FLA_Obj A12, FLA_Obj *ABL, FLA_Obj *ABR, FLA_Obj A20, FLA_Obj A21, FLA_Obj A22, FLA_Quadrant quadrant)
Definition FLA_View.c:304
FLA_Error FLA_Part_2x2(FLA_Obj A, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition FLA_View.c:17
dim_t FLA_Obj_length(FLA_Obj obj)
Definition FLA_Query.c:116
FLA_Error FLA_Repart_2x2_to_3x3(FLA_Obj ATL, FLA_Obj ATR, FLA_Obj *A00, FLA_Obj *A01, FLA_Obj *A02, FLA_Obj *A10, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj ABL, FLA_Obj ABR, FLA_Obj *A20, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition FLA_View.c:142
dim_t FLA_Determine_blocksize(FLA_Obj A_unproc, FLA_Quadrant to_dir, fla_blocksize_t *cntl_blocksizes)
Definition FLA_Blocksize.c:234
unsigned long dim_t
Definition FLA_type_defs.h:71
int i
Definition bl1_axmyv2.c:145
Definition FLA_type_defs.h:159

References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), FLA_Sylv_internal(), and i.

Referenced by FLA_Sylv_hn().

◆ FLA_Sylv_hn_blk_var10()

FLA_Error FLA_Sylv_hn_blk_var10 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale,
fla_sylv_t cntl 
)
16{
17 FLA_Obj ATL, ATR, A00, A01, A02,
18 ABL, ABR, A10, A11, A12,
19 A20, A21, A22;
20
21 FLA_Obj BTL, BTR, B00, B01, B02,
22 BBL, BBR, B10, B11, B12,
23 B20, B21, B22;
24
25 FLA_Obj CTL, CTR, C00, C01, C02,
26 CBL, CBR, C10, C11, C12,
27 C20, C21, C22;
28
29 dim_t b;
30
31 FLA_Part_2x2( A, &ATL, &ATR,
32 &ABL, &ABR, 0, 0, FLA_TL );
33
34 FLA_Part_2x2( B, &BTL, &BTR,
35 &BBL, &BBR, 0, 0, FLA_TL );
36
37 FLA_Part_2x2( C, &CTL, &CTR,
38 &CBL, &CBR, 0, 0, FLA_TL );
39
40 while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){
41
43
44 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
45 /* ************* */ /* ******************** */
46 &A10, /**/ &A11, &A12,
47 ABL, /**/ ABR, &A20, /**/ &A21, &A22,
48 b, b, FLA_BR );
49
50 FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, /**/ &B01, &B02,
51 /* ************* */ /* ******************** */
52 &B10, /**/ &B11, &B12,
53 BBL, /**/ BBR, &B20, /**/ &B21, &B22,
54 b, b, FLA_BR );
55
56 FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, /**/ &C01, &C02,
57 /* ************* */ /* ******************** */
58 &C10, /**/ &C11, &C12,
59 CBL, /**/ CBR, &C20, /**/ &C21, &C22,
60 b, b, FLA_BR );
61
62 // Loop Invariant:
63 // CTL =
64 // CTR =
65 // CBL =
66 // CBR =
67
68 /*------------------------------------------------------------*/
69
70 // C01 = sylv( A00', B11, C01 );
72 isgn, A00, B11, C01, scale,
73 FLA_Cntl_sub_sylv1( cntl ) );
74
75 // C11 = sylv( A11', B11, C11 - A01' * C01 );
78 FLA_Cntl_sub_gemm1( cntl ) );
79
81 isgn, A11, B11, C11, scale,
82 FLA_Cntl_sub_sylv2( cntl ) );
83
84 // C21 = sylv( A22', B11, C21 - A12' * C11 - A02' * C01 );
87 FLA_Cntl_sub_gemm2( cntl ) );
88
91 FLA_Cntl_sub_gemm3( cntl ) );
92
94 isgn, A22, B11, C21, scale,
95 FLA_Cntl_sub_sylv3( cntl ) );
96
97 // C22 = C22 -/+ C21 * B12;
100 FLA_Cntl_sub_gemm4( cntl ) );
101
102 // C12 = C12 -/+ C11 * B12;
105 FLA_Cntl_sub_gemm5( cntl ) );
106
107 // C02 = C02 -/+ C01 * B12;
110 FLA_Cntl_sub_gemm6( cntl ) );
111
112 /*------------------------------------------------------------*/
113
114 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
115 A10, A11, /**/ A12,
116 /* ************** */ /* ****************** */
117 &ABL, /**/ &ABR, A20, A21, /**/ A22,
118 FLA_TL );
119
120 FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, B01, /**/ B02,
121 B10, B11, /**/ B12,
122 /* ************** */ /* ****************** */
123 &BBL, /**/ &BBR, B20, B21, /**/ B22,
124 FLA_TL );
125
126 FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, C01, /**/ C02,
127 C10, C11, /**/ C12,
128 /* ************** */ /* ****************** */
129 &CBL, /**/ &CBR, C20, C21, /**/ C22,
130 FLA_TL );
131
132 }
133
134 return FLA_SUCCESS;
135}

References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), FLA_Sylv_internal(), and i.

Referenced by FLA_Sylv_hn().

◆ FLA_Sylv_hn_blk_var11()

FLA_Error FLA_Sylv_hn_blk_var11 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale,
fla_sylv_t cntl 
)
16{
17 FLA_Obj ATL, ATR, A00, A01, A02,
18 ABL, ABR, A10, A11, A12,
19 A20, A21, A22;
20
21 FLA_Obj BTL, BTR, B00, B01, B02,
22 BBL, BBR, B10, B11, B12,
23 B20, B21, B22;
24
25 FLA_Obj CTL, CTR, C00, C01, C02,
26 CBL, CBR, C10, C11, C12,
27 C20, C21, C22;
28
29 dim_t b;
30
31 FLA_Part_2x2( A, &ATL, &ATR,
32 &ABL, &ABR, 0, 0, FLA_TL );
33
34 FLA_Part_2x2( B, &BTL, &BTR,
35 &BBL, &BBR, 0, 0, FLA_TL );
36
37 FLA_Part_2x2( C, &CTL, &CTR,
38 &CBL, &CBR, 0, 0, FLA_TL );
39
40 while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){
41
43
44 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
45 /* ************* */ /* ******************** */
46 &A10, /**/ &A11, &A12,
47 ABL, /**/ ABR, &A20, /**/ &A21, &A22,
48 b, b, FLA_BR );
49
50 FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, /**/ &B01, &B02,
51 /* ************* */ /* ******************** */
52 &B10, /**/ &B11, &B12,
53 BBL, /**/ BBR, &B20, /**/ &B21, &B22,
54 b, b, FLA_BR );
55
56 FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, /**/ &C01, &C02,
57 /* ************* */ /* ******************** */
58 &C10, /**/ &C11, &C12,
59 CBL, /**/ CBR, &C20, /**/ &C21, &C22,
60 b, b, FLA_BR );
61
62 // Loop Invariant:
63 // CTL =
64 // CTR =
65 // CBL =
66 // CBR =
67
68 /*------------------------------------------------------------*/
69
70 // C10 = sylv( A11', B00, C10 );
72 isgn, A11, B00, C10, scale,
73 FLA_Cntl_sub_sylv1( cntl ) );
74
75 // C20 = C20 - A12' * C10;
78 FLA_Cntl_sub_gemm1( cntl ) );
79
80 // C11 = sylv( A11', B11, C11 -/+ C10 * B01 );
83 FLA_Cntl_sub_gemm2( cntl ) );
84
86 isgn, A11, B11, C11, scale,
87 FLA_Cntl_sub_sylv2( cntl ) );
88
89 // C21 = C21 - A12' * C11;
92 FLA_Cntl_sub_gemm3( cntl ) );
93
94 // C12 = sylv( A11', B22, C12 -/+ C10 * B02 -/+ C11 * B12 );
97 FLA_Cntl_sub_gemm4( cntl ) );
98
101 FLA_Cntl_sub_gemm5( cntl ) );
102
104 isgn, A11, B22, C12, scale,
105 FLA_Cntl_sub_sylv3( cntl ) );
106
107 // C22 = C22 - A12' * C12;
110 FLA_Cntl_sub_gemm6( cntl ) );
111
112 /*------------------------------------------------------------*/
113
114 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
115 A10, A11, /**/ A12,
116 /* ************** */ /* ****************** */
117 &ABL, /**/ &ABR, A20, A21, /**/ A22,
118 FLA_TL );
119
120 FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, B01, /**/ B02,
121 B10, B11, /**/ B12,
122 /* ************** */ /* ****************** */
123 &BBL, /**/ &BBR, B20, B21, /**/ B22,
124 FLA_TL );
125
126 FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, C01, /**/ C02,
127 C10, C11, /**/ C12,
128 /* ************** */ /* ****************** */
129 &CBL, /**/ &CBR, C20, C21, /**/ C22,
130 FLA_TL );
131
132 }
133
134 return FLA_SUCCESS;
135}

References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), FLA_Sylv_internal(), and i.

Referenced by FLA_Sylv_hn().

◆ FLA_Sylv_hn_blk_var12()

FLA_Error FLA_Sylv_hn_blk_var12 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale,
fla_sylv_t cntl 
)
16{
17 FLA_Obj ATL, ATR, A00, A01, A02,
18 ABL, ABR, A10, A11, A12,
19 A20, A21, A22;
20
21 FLA_Obj BTL, BTR, B00, B01, B02,
22 BBL, BBR, B10, B11, B12,
23 B20, B21, B22;
24
25 FLA_Obj CTL, CTR, C00, C01, C02,
26 CBL, CBR, C10, C11, C12,
27 C20, C21, C22;
28
29 dim_t b;
30
31 FLA_Part_2x2( A, &ATL, &ATR,
32 &ABL, &ABR, 0, 0, FLA_TL );
33
34 FLA_Part_2x2( B, &BTL, &BTR,
35 &BBL, &BBR, 0, 0, FLA_TL );
36
37 FLA_Part_2x2( C, &CTL, &CTR,
38 &CBL, &CBR, 0, 0, FLA_TL );
39
40 while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){
41
43
44 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
45 /* ************* */ /* ******************** */
46 &A10, /**/ &A11, &A12,
47 ABL, /**/ ABR, &A20, /**/ &A21, &A22,
48 b, b, FLA_BR );
49
50 FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, /**/ &B01, &B02,
51 /* ************* */ /* ******************** */
52 &B10, /**/ &B11, &B12,
53 BBL, /**/ BBR, &B20, /**/ &B21, &B22,
54 b, b, FLA_BR );
55
56 FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, /**/ &C01, &C02,
57 /* ************* */ /* ******************** */
58 &C10, /**/ &C11, &C12,
59 CBL, /**/ CBR, &C20, /**/ &C21, &C22,
60 b, b, FLA_BR );
61
62 // Loop Invariant:
63 // CTL =
64 // CTR =
65 // CBL =
66 // CBR =
67
68 /*------------------------------------------------------------*/
69
70 // C11 = sylv( A11', B11, C11 - A01' * C01 );
73 FLA_Cntl_sub_gemm1( cntl ) );
74
76 isgn, A11, B11, C11, scale,
77 FLA_Cntl_sub_sylv1( cntl ) );
78
79 // C21 = sylv( A22', B11, C21 - A12' * C11 - A02' * C01 );
82 FLA_Cntl_sub_gemm2( cntl ) );
83
86 FLA_Cntl_sub_gemm3( cntl ) );
87
89 isgn, A22, B11, C21, scale,
90 FLA_Cntl_sub_sylv2( cntl ) );
91
92 // C22 = C22 -/+ C21 * B12;
95 FLA_Cntl_sub_gemm4( cntl ) );
96
97 // C12 = sylv( A11', B22, C12 - A01' * C02 -/+ C11 * B12 );
100 FLA_Cntl_sub_gemm5( cntl ) );
101
104 FLA_Cntl_sub_gemm6( cntl ) );
105
107 isgn, A11, B22, C12, scale,
108 FLA_Cntl_sub_sylv3( cntl ) );
109
110 /*------------------------------------------------------------*/
111
112 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
113 A10, A11, /**/ A12,
114 /* ************** */ /* ****************** */
115 &ABL, /**/ &ABR, A20, A21, /**/ A22,
116 FLA_TL );
117
118 FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, B01, /**/ B02,
119 B10, B11, /**/ B12,
120 /* ************** */ /* ****************** */
121 &BBL, /**/ &BBR, B20, B21, /**/ B22,
122 FLA_TL );
123
124 FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, C01, /**/ C02,
125 C10, C11, /**/ C12,
126 /* ************** */ /* ****************** */
127 &CBL, /**/ &CBR, C20, C21, /**/ C22,
128 FLA_TL );
129
130 }
131
132 return FLA_SUCCESS;
133}

References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), FLA_Sylv_internal(), and i.

Referenced by FLA_Sylv_hn().

◆ FLA_Sylv_hn_blk_var13()

FLA_Error FLA_Sylv_hn_blk_var13 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale,
fla_sylv_t cntl 
)
16{
17 FLA_Obj ATL, ATR, A00, A01, A02,
18 ABL, ABR, A10, A11, A12,
19 A20, A21, A22;
20
21 FLA_Obj BTL, BTR, B00, B01, B02,
22 BBL, BBR, B10, B11, B12,
23 B20, B21, B22;
24
25 FLA_Obj CTL, CTR, C00, C01, C02,
26 CBL, CBR, C10, C11, C12,
27 C20, C21, C22;
28
29 dim_t b;
30
31 FLA_Part_2x2( A, &ATL, &ATR,
32 &ABL, &ABR, 0, 0, FLA_TL );
33
34 FLA_Part_2x2( B, &BTL, &BTR,
35 &BBL, &BBR, 0, 0, FLA_TL );
36
37 FLA_Part_2x2( C, &CTL, &CTR,
38 &CBL, &CBR, 0, 0, FLA_TL );
39
40 while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){
41
43
44 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
45 /* ************* */ /* ******************** */
46 &A10, /**/ &A11, &A12,
47 ABL, /**/ ABR, &A20, /**/ &A21, &A22,
48 b, b, FLA_BR );
49
50 FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, /**/ &B01, &B02,
51 /* ************* */ /* ******************** */
52 &B10, /**/ &B11, &B12,
53 BBL, /**/ BBR, &B20, /**/ &B21, &B22,
54 b, b, FLA_BR );
55
56 FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, /**/ &C01, &C02,
57 /* ************* */ /* ******************** */
58 &C10, /**/ &C11, &C12,
59 CBL, /**/ CBR, &C20, /**/ &C21, &C22,
60 b, b, FLA_BR );
61
62 // Loop Invariant:
63 // CTL =
64 // CTR =
65 // CBL =
66 // CBR =
67
68 /*------------------------------------------------------------*/
69
70 // C11 = sylv( A11', B11, C11 -/+ C10 * B01 );
73 FLA_Cntl_sub_gemm1( cntl ) );
74
76 isgn, A11, B11, C11, scale,
77 FLA_Cntl_sub_sylv1( cntl ) );
78
79 // C21 = sylv( A22', B11, C21 - A12' * C11 -/+ C20 * B01 );
82 FLA_Cntl_sub_gemm2( cntl ) );
83
86 FLA_Cntl_sub_gemm3( cntl ) );
87
89 isgn, A22, B11, C21, scale,
90 FLA_Cntl_sub_sylv2( cntl ) );
91
92 // C12 = sylv( A11', B22, C12 -/+ C10 * B02 -/+ C11 * B12 );
95 FLA_Cntl_sub_gemm4( cntl ) );
96
99 FLA_Cntl_sub_gemm5( cntl ) );
100
102 isgn, A11, B22, C12, scale,
103 FLA_Cntl_sub_sylv3( cntl ) );
104
105 // C22 = C22 - A12' * C12;
108 FLA_Cntl_sub_gemm6( cntl ) );
109
110 /*------------------------------------------------------------*/
111
112 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
113 A10, A11, /**/ A12,
114 /* ************** */ /* ****************** */
115 &ABL, /**/ &ABR, A20, A21, /**/ A22,
116 FLA_TL );
117
118 FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, B01, /**/ B02,
119 B10, B11, /**/ B12,
120 /* ************** */ /* ****************** */
121 &BBL, /**/ &BBR, B20, B21, /**/ B22,
122 FLA_TL );
123
124 FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, C01, /**/ C02,
125 C10, C11, /**/ C12,
126 /* ************** */ /* ****************** */
127 &CBL, /**/ &CBR, C20, C21, /**/ C22,
128 FLA_TL );
129
130 }
131
132 return FLA_SUCCESS;
133}

References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), FLA_Sylv_internal(), and i.

Referenced by FLA_Sylv_hn().

◆ FLA_Sylv_hn_blk_var14()

FLA_Error FLA_Sylv_hn_blk_var14 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale,
fla_sylv_t cntl 
)
16{
17 FLA_Obj ATL, ATR, A00, A01, A02,
18 ABL, ABR, A10, A11, A12,
19 A20, A21, A22;
20
21 FLA_Obj BTL, BTR, B00, B01, B02,
22 BBL, BBR, B10, B11, B12,
23 B20, B21, B22;
24
25 FLA_Obj CTL, CTR, C00, C01, C02,
26 CBL, CBR, C10, C11, C12,
27 C20, C21, C22;
28
29 dim_t b;
30
31 FLA_Part_2x2( A, &ATL, &ATR,
32 &ABL, &ABR, 0, 0, FLA_TL );
33
34 FLA_Part_2x2( B, &BTL, &BTR,
35 &BBL, &BBR, 0, 0, FLA_TL );
36
37 FLA_Part_2x2( C, &CTL, &CTR,
38 &CBL, &CBR, 0, 0, FLA_TL );
39
40 while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){
41
43
44 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
45 /* ************* */ /* ******************** */
46 &A10, /**/ &A11, &A12,
47 ABL, /**/ ABR, &A20, /**/ &A21, &A22,
48 b, b, FLA_BR );
49
50 FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, /**/ &B01, &B02,
51 /* ************* */ /* ******************** */
52 &B10, /**/ &B11, &B12,
53 BBL, /**/ BBR, &B20, /**/ &B21, &B22,
54 b, b, FLA_BR );
55
56 FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, /**/ &C01, &C02,
57 /* ************* */ /* ******************** */
58 &C10, /**/ &C11, &C12,
59 CBL, /**/ CBR, &C20, /**/ &C21, &C22,
60 b, b, FLA_BR );
61
62 // Loop Invariant:
63 // CTL =
64 // CTR =
65 // CBL =
66 // CBR =
67
68 /*------------------------------------------------------------*/
69
70 // C11 = sylv( A11', B11, C11 );
72 isgn, A11, B11, C11, scale,
73 FLA_Cntl_sub_sylv1( cntl ) );
74
75 // C21 = sylv( A22', B11, C21 - A12' * C11 );
78 FLA_Cntl_sub_gemm1( cntl ) );
79
81 isgn, A22, B11, C21, scale,
82 FLA_Cntl_sub_sylv2( cntl ) );
83
84 // C12 = sylv( A11', B22, C12 -/+ C11 * B12 );
87 FLA_Cntl_sub_gemm2( cntl ) );
88
90 isgn, A11, B22, C12, scale,
91 FLA_Cntl_sub_sylv3( cntl ) );
92
93 // C22 = C22 - A12' * C12 -/+ C21 * B12;
96 FLA_Cntl_sub_gemm3( cntl ) );
97
100 FLA_Cntl_sub_gemm4( cntl ) );
101
102 /*------------------------------------------------------------*/
103
104 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
105 A10, A11, /**/ A12,
106 /* ************** */ /* ****************** */
107 &ABL, /**/ &ABR, A20, A21, /**/ A22,
108 FLA_TL );
109
110 FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, B01, /**/ B02,
111 B10, B11, /**/ B12,
112 /* ************** */ /* ****************** */
113 &BBL, /**/ &BBR, B20, B21, /**/ B22,
114 FLA_TL );
115
116 FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, C01, /**/ C02,
117 C10, C11, /**/ C12,
118 /* ************** */ /* ****************** */
119 &CBL, /**/ &CBR, C20, C21, /**/ C22,
120 FLA_TL );
121
122 }
123
124 return FLA_SUCCESS;
125}

References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), FLA_Sylv_internal(), and i.

Referenced by FLA_Sylv_hn().

◆ FLA_Sylv_hn_blk_var15()

FLA_Error FLA_Sylv_hn_blk_var15 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale,
fla_sylv_t cntl 
)
14{
15 FLA_Obj ATL, ATR, A00, A01, A02,
16 ABL, ABR, A10, A11, A12,
17 A20, A21, A22;
18
19 FLA_Obj CT, C0,
20 CB, C1,
21 C2;
22
23 dim_t b;
24
25 FLA_Part_2x2( A, &ATL, &ATR,
26 &ABL, &ABR, 0, 0, FLA_TL );
27
28 FLA_Part_2x1( C, &CT,
29 &CB, 0, FLA_TOP );
30
31 while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){
32
34
35 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
36 /* ************* */ /* ******************** */
37 &A10, /**/ &A11, &A12,
38 ABL, /**/ ABR, &A20, /**/ &A21, &A22,
39 b, b, FLA_BR );
40
42 /* ** */ /* ** */
43 &C1,
44 CB, &C2, b, FLA_BOTTOM );
45
46 // Loop Invariant:
47 // CT = sylv( ATL', B, CT )
48 // CB = CB
49
50 /*------------------------------------------------------------*/
51
52 // C1 = sylv( A11', B, C1 - A01' * C0 );
55 FLA_Cntl_sub_gemm1( cntl ) );
56
58 isgn, A11, B, C1, scale,
59 FLA_Cntl_sub_sylv1( cntl ) );
60
61 /*------------------------------------------------------------*/
62
63 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
64 A10, A11, /**/ A12,
65 /* ************** */ /* ****************** */
66 &ABL, /**/ &ABR, A20, A21, /**/ A22,
67 FLA_TL );
68
70 C1,
71 /* ** */ /* ** */
72 &CB, C2, FLA_TOP );
73
74 }
75
76 return FLA_SUCCESS;
77}
FLA_Error FLA_Cont_with_3x1_to_2x1(FLA_Obj *AT, FLA_Obj A0, FLA_Obj A1, FLA_Obj *AB, FLA_Obj A2, FLA_Side side)
Definition FLA_View.c:428
FLA_Error FLA_Repart_2x1_to_3x1(FLA_Obj AT, FLA_Obj *A0, FLA_Obj *A1, FLA_Obj AB, FLA_Obj *A2, dim_t mb, FLA_Side side)
Definition FLA_View.c:226
FLA_Error FLA_Part_2x1(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t mb, FLA_Side side)
Definition FLA_View.c:76

References FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Sylv_internal(), and i.

Referenced by FLA_Sylv_hn().

◆ FLA_Sylv_hn_blk_var16()

FLA_Error FLA_Sylv_hn_blk_var16 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale,
fla_sylv_t cntl 
)
16{
17 FLA_Obj ATL, ATR, A00, A01, A02,
18 ABL, ABR, A10, A11, A12,
19 A20, A21, A22;
20
21 FLA_Obj CT, C0,
22 CB, C1,
23 C2;
24
25 dim_t b;
26
27 FLA_Part_2x2( A, &ATL, &ATR,
28 &ABL, &ABR, 0, 0, FLA_TL );
29
30 FLA_Part_2x1( C, &CT,
31 &CB, 0, FLA_TOP );
32
33 while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){
34
36
37 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
38 /* ************* */ /* ******************** */
39 &A10, /**/ &A11, &A12,
40 ABL, /**/ ABR, &A20, /**/ &A21, &A22,
41 b, b, FLA_BR );
42
44 /* ** */ /* ** */
45 &C1,
46 CB, &C2, b, FLA_BOTTOM );
47
48 // Loop Invariant:
49 // CT = sylv( ATL', B, CT )
50 // CB = CB - ATR' * sylv( ATL', B, CT )
51
52 /*------------------------------------------------------------*/
53
54 // C1 = sylv( A11', B, C1 );
56 isgn, A11, B, C1, scale,
57 FLA_Cntl_sub_sylv1( cntl ) );
58
59 // C2 = C2 - A12' * C1;
62 FLA_Cntl_sub_gemm1( cntl ) );
63
64 /*------------------------------------------------------------*/
65
66 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
67 A10, A11, /**/ A12,
68 /* ************** */ /* ****************** */
69 &ABL, /**/ &ABR, A20, A21, /**/ A22,
70 FLA_TL );
71
73 C1,
74 /* ** */ /* ** */
75 &CB, C2, FLA_TOP );
76
77 }
78
79 return FLA_SUCCESS;
80}

References FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Sylv_internal(), and i.

Referenced by FLA_Sylv_hn().

◆ FLA_Sylv_hn_blk_var17()

FLA_Error FLA_Sylv_hn_blk_var17 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale,
fla_sylv_t cntl 
)
14{
15 FLA_Obj BTL, BTR, B00, B01, B02,
16 BBL, BBR, B10, B11, B12,
17 B20, B21, B22;
18
19 FLA_Obj CL, CR, C0, C1, C2;
20
21 dim_t b;
22
23 FLA_Part_2x2( B, &BTL, &BTR,
24 &BBL, &BBR, 0, 0, FLA_TL );
25
26 FLA_Part_1x2( C, &CL, &CR, 0, FLA_LEFT );
27
28 while ( FLA_Obj_length( BTL ) < FLA_Obj_length( B ) ){
29
31
32 FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, /**/ &B01, &B02,
33 /* ************* */ /* ******************** */
34 &B10, /**/ &B11, &B12,
35 BBL, /**/ BBR, &B20, /**/ &B21, &B22,
36 b, b, FLA_BR );
37
38 FLA_Repart_1x2_to_1x3( CL, /**/ CR, &C0, /**/ &C1, &C2,
39 b, FLA_RIGHT );
40
41 // Loop Invariant:
42 // CL =
43 // CR =
44
45 /*------------------------------------------------------------*/
46
47 // C1 = sylv( A', B11, C1 -/+ C0 * B01 );
50 FLA_Cntl_sub_gemm1( cntl ) );
51
53 isgn, A, B11, C1, scale,
54 FLA_Cntl_sub_sylv1( cntl ) );
55
56 /*------------------------------------------------------------*/
57
58 FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, B01, /**/ B02,
59 B10, B11, /**/ B12,
60 /* ************** */ /* ****************** */
61 &BBL, /**/ &BBR, B20, B21, /**/ B22,
62 FLA_TL );
63
64 FLA_Cont_with_1x3_to_1x2( &CL, /**/ &CR, C0, C1, /**/ C2,
65 FLA_LEFT );
66 }
67
68 return FLA_SUCCESS;
69}
FLA_Error FLA_Cont_with_1x3_to_1x2(FLA_Obj *AL, FLA_Obj *AR, FLA_Obj A0, FLA_Obj A1, FLA_Obj A2, FLA_Side side)
Definition FLA_View.c:475
FLA_Error FLA_Part_1x2(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition FLA_View.c:110
FLA_Error FLA_Repart_1x2_to_1x3(FLA_Obj AL, FLA_Obj AR, FLA_Obj *A0, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition FLA_View.c:267

References FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_Obj_length(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x2_to_3x3(), FLA_Sylv_internal(), and i.

Referenced by FLA_Sylv_hn().

◆ FLA_Sylv_hn_blk_var18()

FLA_Error FLA_Sylv_hn_blk_var18 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale,
fla_sylv_t cntl 
)
14{
15 FLA_Obj BTL, BTR, B00, B01, B02,
16 BBL, BBR, B10, B11, B12,
17 B20, B21, B22;
18
19 FLA_Obj CL, CR, C0, C1, C2;
20
21 dim_t b;
22
23 FLA_Part_2x2( B, &BTL, &BTR,
24 &BBL, &BBR, 0, 0, FLA_TL );
25
26 FLA_Part_1x2( C, &CL, &CR, 0, FLA_LEFT );
27
28 while ( FLA_Obj_length( BTL ) < FLA_Obj_length( B ) ){
29
31
32 FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, /**/ &B01, &B02,
33 /* ************* */ /* ******************** */
34 &B10, /**/ &B11, &B12,
35 BBL, /**/ BBR, &B20, /**/ &B21, &B22,
36 b, b, FLA_BR );
37
38 FLA_Repart_1x2_to_1x3( CL, /**/ CR, &C0, /**/ &C1, &C2,
39 b, FLA_RIGHT );
40
41 // Loop Invariant:
42 // CL =
43 // CR =
44
45 /*------------------------------------------------------------*/
46
47 // C1 = sylv( A', B11, C1 );
49 isgn, A, B11, C1, scale,
50 FLA_Cntl_sub_sylv1( cntl ) );
51
52 // C2 = C2 -/+ C1 * B12;
55 FLA_Cntl_sub_gemm1( cntl ) );
56
57 /*------------------------------------------------------------*/
58
59 FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, B01, /**/ B02,
60 B10, B11, /**/ B12,
61 /* ************** */ /* ****************** */
62 &BBL, /**/ &BBR, B20, B21, /**/ B22,
63 FLA_TL );
64
65 FLA_Cont_with_1x3_to_1x2( &CL, /**/ &CR, C0, C1, /**/ C2,
66 FLA_LEFT );
67 }
68
69 return FLA_SUCCESS;
70}

References FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_Obj_length(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x2_to_3x3(), FLA_Sylv_internal(), and i.

Referenced by FLA_Sylv_hn().

◆ FLA_Sylv_hn_blk_var2()

FLA_Error FLA_Sylv_hn_blk_var2 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale,
fla_sylv_t cntl 
)
16{
17 FLA_Obj ATL, ATR, A00, A01, A02,
18 ABL, ABR, A10, A11, A12,
19 A20, A21, A22;
20
21 FLA_Obj BTL, BTR, B00, B01, B02,
22 BBL, BBR, B10, B11, B12,
23 B20, B21, B22;
24
25 FLA_Obj CTL, CTR, C00, C01, C02,
26 CBL, CBR, C10, C11, C12,
27 C20, C21, C22;
28
29 dim_t b;
30
31 FLA_Part_2x2( A, &ATL, &ATR,
32 &ABL, &ABR, 0, 0, FLA_TL );
33
34 FLA_Part_2x2( B, &BTL, &BTR,
35 &BBL, &BBR, 0, 0, FLA_TL );
36
37 FLA_Part_2x2( C, &CTL, &CTR,
38 &CBL, &CBR, 0, 0, FLA_TL );
39
40 while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){
41
43
44 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
45 /* ************* */ /* ******************** */
46 &A10, /**/ &A11, &A12,
47 ABL, /**/ ABR, &A20, /**/ &A21, &A22,
48 b, b, FLA_BR );
49
50 FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, /**/ &B01, &B02,
51 /* ************* */ /* ******************** */
52 &B10, /**/ &B11, &B12,
53 BBL, /**/ BBR, &B20, /**/ &B21, &B22,
54 b, b, FLA_BR );
55
56 FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, /**/ &C01, &C02,
57 /* ************* */ /* ******************** */
58 &C10, /**/ &C11, &C12,
59 CBL, /**/ CBR, &C20, /**/ &C21, &C22,
60 b, b, FLA_BR );
61
62 // Loop Invariant:
63 // CTL = sylv( ATL, BTL, CTL )
64 // CTR = CTR
65 // CBL = CBL - ATR' * sylv( ATL, BTL, CTL )
66 // CBR = CBR
67
68 /*------------------------------------------------------------*/
69
70 // C10 = sylv( A11', B00, C10 );
72 isgn, A11, B00, C10, scale,
73 FLA_Cntl_sub_sylv1( cntl ) );
74
75 // C20 = C20 - A12' * C10;
78 FLA_Cntl_sub_gemm1( cntl ) );
79
80 // C01 = sylv( A00', B11, C01 -/+ C00 * B01 );
83 FLA_Cntl_sub_gemm2( cntl ) );
84
86 isgn, A00, B11, C01, scale,
87 FLA_Cntl_sub_sylv2( cntl ) );
88
89 // C11 = sylv( A11', B11, C11 - A01' * C01 -/+ C10 * B01 );
92 FLA_Cntl_sub_gemm3( cntl ) );
93
96 FLA_Cntl_sub_gemm4( cntl ) );
97
99 isgn, A11, B11, C11, scale,
100 FLA_Cntl_sub_sylv3( cntl ) );
101
102 // C21 = C21 - A12' * C11 - A02' * C01;
105 FLA_Cntl_sub_gemm5( cntl ) );
106
109 FLA_Cntl_sub_gemm6( cntl ) );
110
111 /*------------------------------------------------------------*/
112
113 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
114 A10, A11, /**/ A12,
115 /* ************** */ /* ****************** */
116 &ABL, /**/ &ABR, A20, A21, /**/ A22,
117 FLA_TL );
118
119 FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, B01, /**/ B02,
120 B10, B11, /**/ B12,
121 /* ************** */ /* ****************** */
122 &BBL, /**/ &BBR, B20, B21, /**/ B22,
123 FLA_TL );
124
125 FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, C01, /**/ C02,
126 C10, C11, /**/ C12,
127 /* ************** */ /* ****************** */
128 &CBL, /**/ &CBR, C20, C21, /**/ C22,
129 FLA_TL );
130
131 }
132
133 return FLA_SUCCESS;
134}

References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), FLA_Sylv_internal(), and i.

Referenced by FLA_Sylv_hn().

◆ FLA_Sylv_hn_blk_var3()

FLA_Error FLA_Sylv_hn_blk_var3 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale,
fla_sylv_t cntl 
)
16{
17 FLA_Obj ATL, ATR, A00, A01, A02,
18 ABL, ABR, A10, A11, A12,
19 A20, A21, A22;
20
21 FLA_Obj BTL, BTR, B00, B01, B02,
22 BBL, BBR, B10, B11, B12,
23 B20, B21, B22;
24
25 FLA_Obj CTL, CTR, C00, C01, C02,
26 CBL, CBR, C10, C11, C12,
27 C20, C21, C22;
28
29 dim_t b;
30
31 FLA_Part_2x2( A, &ATL, &ATR,
32 &ABL, &ABR, 0, 0, FLA_TL );
33
34 FLA_Part_2x2( B, &BTL, &BTR,
35 &BBL, &BBR, 0, 0, FLA_TL );
36
37 FLA_Part_2x2( C, &CTL, &CTR,
38 &CBL, &CBR, 0, 0, FLA_TL );
39
40 while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){
41
43
44 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
45 /* ************* */ /* ******************** */
46 &A10, /**/ &A11, &A12,
47 ABL, /**/ ABR, &A20, /**/ &A21, &A22,
48 b, b, FLA_BR );
49
50 FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, /**/ &B01, &B02,
51 /* ************* */ /* ******************** */
52 &B10, /**/ &B11, &B12,
53 BBL, /**/ BBR, &B20, /**/ &B21, &B22,
54 b, b, FLA_BR );
55
56 FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, /**/ &C01, &C02,
57 /* ************* */ /* ******************** */
58 &C10, /**/ &C11, &C12,
59 CBL, /**/ CBR, &C20, /**/ &C21, &C22,
60 b, b, FLA_BR );
61
62 // Loop Invariant:
63 // CTL = sylv( ATL', BTL, CTL )
64 // CTR = CTR
65 // CBL = sylv( ABR', BTL, CBL - ATR' * sylv( ATL', BTL, CTL ) )
66 // CBR = CBR
67
68 /*------------------------------------------------------------*/
69
70 // C01 = sylv( A00', B11, C01 -/+ C00 * B01 );
73 FLA_Cntl_sub_gemm1( cntl ) );
74
76 isgn, A00, B11, C01, scale,
77 FLA_Cntl_sub_sylv1( cntl ) );
78
79 // C11 = sylv( A11', B11, C11 - A01' * C01 -/+ C10 * B01 );
82 FLA_Cntl_sub_gemm2( cntl ) );
83
86 FLA_Cntl_sub_gemm3( cntl ) );
87
89 isgn, A11, B11, C11, scale,
90 FLA_Cntl_sub_sylv2( cntl ) );
91
92 // C21 = sylv( A22', B11, C21 - A12' * C11 - A02' * C01 -/+ C20 * B01 );
95 FLA_Cntl_sub_gemm4( cntl ) );
96
99 FLA_Cntl_sub_gemm5( cntl ) );
100
103 FLA_Cntl_sub_gemm6( cntl ) );
104
106 isgn, A22, B11, C21, scale,
107 FLA_Cntl_sub_sylv3( cntl ) );
108
109 /*------------------------------------------------------------*/
110
111 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
112 A10, A11, /**/ A12,
113 /* ************** */ /* ****************** */
114 &ABL, /**/ &ABR, A20, A21, /**/ A22,
115 FLA_TL );
116
117 FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, B01, /**/ B02,
118 B10, B11, /**/ B12,
119 /* ************** */ /* ****************** */
120 &BBL, /**/ &BBR, B20, B21, /**/ B22,
121 FLA_TL );
122
123 FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, C01, /**/ C02,
124 C10, C11, /**/ C12,
125 /* ************** */ /* ****************** */
126 &CBL, /**/ &CBR, C20, C21, /**/ C22,
127 FLA_TL );
128
129 }
130
131 return FLA_SUCCESS;
132}

References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), FLA_Sylv_internal(), and i.

Referenced by FLA_Sylv_hn().

◆ FLA_Sylv_hn_blk_var4()

FLA_Error FLA_Sylv_hn_blk_var4 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale,
fla_sylv_t cntl 
)
16{
17 FLA_Obj ATL, ATR, A00, A01, A02,
18 ABL, ABR, A10, A11, A12,
19 A20, A21, A22;
20
21 FLA_Obj BTL, BTR, B00, B01, B02,
22 BBL, BBR, B10, B11, B12,
23 B20, B21, B22;
24
25 FLA_Obj CTL, CTR, C00, C01, C02,
26 CBL, CBR, C10, C11, C12,
27 C20, C21, C22;
28
29 dim_t b;
30
31 FLA_Part_2x2( A, &ATL, &ATR,
32 &ABL, &ABR, 0, 0, FLA_TL );
33
34 FLA_Part_2x2( B, &BTL, &BTR,
35 &BBL, &BBR, 0, 0, FLA_TL );
36
37 FLA_Part_2x2( C, &CTL, &CTR,
38 &CBL, &CBR, 0, 0, FLA_TL );
39
40 while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){
41
43
44 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
45 /* ************* */ /* ******************** */
46 &A10, /**/ &A11, &A12,
47 ABL, /**/ ABR, &A20, /**/ &A21, &A22,
48 b, b, FLA_BR );
49
50 FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, /**/ &B01, &B02,
51 /* ************* */ /* ******************** */
52 &B10, /**/ &B11, &B12,
53 BBL, /**/ BBR, &B20, /**/ &B21, &B22,
54 b, b, FLA_BR );
55
56 FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, /**/ &C01, &C02,
57 /* ************* */ /* ******************** */
58 &C10, /**/ &C11, &C12,
59 CBL, /**/ CBR, &C20, /**/ &C21, &C22,
60 b, b, FLA_BR );
61
62 // Loop Invariant:
63 // CTL =
64 // CTR =
65 // CBL =
66 // CBR =
67
68 /*------------------------------------------------------------*/
69
70 // C10 = sylv( A11', B00, C10 - A01' * C00 );
73 FLA_Cntl_sub_gemm1( cntl ) );
74
76 isgn, A11, B00, C10, scale,
77 FLA_Cntl_sub_sylv1( cntl ) );
78
79 // C01 = sylv( A00', B11, C01 );
81 isgn, A00, B11, C01, scale,
82 FLA_Cntl_sub_sylv2( cntl ) );
83
84 // C11 = sylv( A11', B11, C11 - A01' * C01 -/+ C10 * B01 );
87 FLA_Cntl_sub_gemm2( cntl ) );
88
91 FLA_Cntl_sub_gemm3( cntl ) );
92
94 isgn, A11, B11, C11, scale,
95 FLA_Cntl_sub_sylv3( cntl ) );
96
97 // C12 = C12 -/+ C10 * B02 -/+ C11 * B12;
100 FLA_Cntl_sub_gemm4( cntl ) );
101
104 FLA_Cntl_sub_gemm5( cntl ) );
105
106 // C02 = C02 -/+ C01 * B12;
109 FLA_Cntl_sub_gemm6( cntl ) );
110
111 /*------------------------------------------------------------*/
112
113 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
114 A10, A11, /**/ A12,
115 /* ************** */ /* ****************** */
116 &ABL, /**/ &ABR, A20, A21, /**/ A22,
117 FLA_TL );
118
119 FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, B01, /**/ B02,
120 B10, B11, /**/ B12,
121 /* ************** */ /* ****************** */
122 &BBL, /**/ &BBR, B20, B21, /**/ B22,
123 FLA_TL );
124
125 FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, C01, /**/ C02,
126 C10, C11, /**/ C12,
127 /* ************** */ /* ****************** */
128 &CBL, /**/ &CBR, C20, C21, /**/ C22,
129 FLA_TL );
130
131 }
132
133 return FLA_SUCCESS;
134}

References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), FLA_Sylv_internal(), and i.

Referenced by FLA_Sylv_hn().

◆ FLA_Sylv_hn_blk_var5()

FLA_Error FLA_Sylv_hn_blk_var5 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale,
fla_sylv_t cntl 
)
16{
17 FLA_Obj ATL, ATR, A00, A01, A02,
18 ABL, ABR, A10, A11, A12,
19 A20, A21, A22;
20
21 FLA_Obj BTL, BTR, B00, B01, B02,
22 BBL, BBR, B10, B11, B12,
23 B20, B21, B22;
24
25 FLA_Obj CTL, CTR, C00, C01, C02,
26 CBL, CBR, C10, C11, C12,
27 C20, C21, C22;
28
29 dim_t b;
30
31 FLA_Part_2x2( A, &ATL, &ATR,
32 &ABL, &ABR, 0, 0, FLA_TL );
33
34 FLA_Part_2x2( B, &BTL, &BTR,
35 &BBL, &BBR, 0, 0, FLA_TL );
36
37 FLA_Part_2x2( C, &CTL, &CTR,
38 &CBL, &CBR, 0, 0, FLA_TL );
39
40 while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){
41
43
44 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
45 /* ************* */ /* ******************** */
46 &A10, /**/ &A11, &A12,
47 ABL, /**/ ABR, &A20, /**/ &A21, &A22,
48 b, b, FLA_BR );
49
50 FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, /**/ &B01, &B02,
51 /* ************* */ /* ******************** */
52 &B10, /**/ &B11, &B12,
53 BBL, /**/ BBR, &B20, /**/ &B21, &B22,
54 b, b, FLA_BR );
55
56 FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, /**/ &C01, &C02,
57 /* ************* */ /* ******************** */
58 &C10, /**/ &C11, &C12,
59 CBL, /**/ CBR, &C20, /**/ &C21, &C22,
60 b, b, FLA_BR );
61
62 // Loop Invariant:
63 // CTL =
64 // CTR =
65 // CBL =
66 // CBR =
67
68 /*------------------------------------------------------------*/
69
70 // C10 = sylv( A11', B00, C10 - A01' * C00 );
73 FLA_Cntl_sub_gemm1( cntl ) );
74
76 isgn, A11, B00, C10, scale,
77 FLA_Cntl_sub_sylv1( cntl ) );
78
79 // C11 = sylv( A11', B11, C11 - A01' * C01 -/+ C10 * B01 );
82 FLA_Cntl_sub_gemm2( cntl ) );
83
86 FLA_Cntl_sub_gemm3( cntl ) );
87
89 isgn, A11, B11, C11, scale,
90 FLA_Cntl_sub_sylv2( cntl ) );
91
92 // C12 = sylv( A11', B22, C12 - A01' * C02 -/+ C10 * B02 -/+ C11 * B12 );
95 FLA_Cntl_sub_gemm4( cntl ) );
96
99 FLA_Cntl_sub_gemm5( cntl ) );
100
103 FLA_Cntl_sub_gemm6( cntl ) );
104
106 isgn, A11, B22, C12, scale,
107 FLA_Cntl_sub_sylv3( cntl ) );
108
109 /*------------------------------------------------------------*/
110
111 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
112 A10, A11, /**/ A12,
113 /* ************** */ /* ****************** */
114 &ABL, /**/ &ABR, A20, A21, /**/ A22,
115 FLA_TL );
116
117 FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, B01, /**/ B02,
118 B10, B11, /**/ B12,
119 /* ************** */ /* ****************** */
120 &BBL, /**/ &BBR, B20, B21, /**/ B22,
121 FLA_TL );
122
123 FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, C01, /**/ C02,
124 C10, C11, /**/ C12,
125 /* ************** */ /* ****************** */
126 &CBL, /**/ &CBR, C20, C21, /**/ C22,
127 FLA_TL );
128
129 }
130
131 return FLA_SUCCESS;
132}

References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), FLA_Sylv_internal(), and i.

Referenced by FLA_Sylv_hn().

◆ FLA_Sylv_hn_blk_var6()

FLA_Error FLA_Sylv_hn_blk_var6 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale,
fla_sylv_t cntl 
)
16{
17 FLA_Obj ATL, ATR, A00, A01, A02,
18 ABL, ABR, A10, A11, A12,
19 A20, A21, A22;
20
21 FLA_Obj BTL, BTR, B00, B01, B02,
22 BBL, BBR, B10, B11, B12,
23 B20, B21, B22;
24
25 FLA_Obj CTL, CTR, C00, C01, C02,
26 CBL, CBR, C10, C11, C12,
27 C20, C21, C22;
28
29 dim_t b;
30
31 FLA_Part_2x2( A, &ATL, &ATR,
32 &ABL, &ABR, 0, 0, FLA_TL );
33
34 FLA_Part_2x2( B, &BTL, &BTR,
35 &BBL, &BBR, 0, 0, FLA_TL );
36
37 FLA_Part_2x2( C, &CTL, &CTR,
38 &CBL, &CBR, 0, 0, FLA_TL );
39
40 while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){
41
43
44 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
45 /* ************* */ /* ******************** */
46 &A10, /**/ &A11, &A12,
47 ABL, /**/ ABR, &A20, /**/ &A21, &A22,
48 b, b, FLA_BR );
49
50 FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, /**/ &B01, &B02,
51 /* ************* */ /* ******************** */
52 &B10, /**/ &B11, &B12,
53 BBL, /**/ BBR, &B20, /**/ &B21, &B22,
54 b, b, FLA_BR );
55
56 FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, /**/ &C01, &C02,
57 /* ************* */ /* ******************** */
58 &C10, /**/ &C11, &C12,
59 CBL, /**/ CBR, &C20, /**/ &C21, &C22,
60 b, b, FLA_BR );
61
62 // Loop Invariant:
63 // CTL =
64 // CTR =
65 // CBL =
66 // CBR =
67
68 /*------------------------------------------------------------*/
69
70 // C10 = sylv( A11', B00, C10 );
72 isgn, A11, B00, C10, scale,
73 FLA_Cntl_sub_sylv1( cntl ) );
74
75 // C20 = C20 - A12' * C10;
78 FLA_Cntl_sub_gemm1( cntl ) );
79
80 // C01 = sylv( A00', B11, C01 );
82 isgn, A00, B11, C01, scale,
83 FLA_Cntl_sub_sylv2( cntl ) );
84
85 // C11 = sylv( A11', B11, C11 - A01' * C01 -/+ C10 * B01 );
88 FLA_Cntl_sub_gemm2( cntl ) );
89
92 FLA_Cntl_sub_gemm3( cntl ) );
93
95 isgn, A11, B11, C11, scale,
96 FLA_Cntl_sub_sylv3( cntl ) );
97
98 // C21 = C21 - A12' * C11 - A02' * C01;
101 FLA_Cntl_sub_gemm4( cntl ) );
102
105 FLA_Cntl_sub_gemm5( cntl ) );
106
107 // C12 = C12 -/+ C10 * B02 -/+ C11 * B12;
110 FLA_Cntl_sub_gemm6( cntl ) );
111
114 FLA_Cntl_sub_gemm7( cntl ) );
115
116 // C02 = C02 -/+ C01 * B12;
119 FLA_Cntl_sub_gemm8( cntl ) );
120
121 /*------------------------------------------------------------*/
122
123 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
124 A10, A11, /**/ A12,
125 /* ************** */ /* ****************** */
126 &ABL, /**/ &ABR, A20, A21, /**/ A22,
127 FLA_TL );
128
129 FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, B01, /**/ B02,
130 B10, B11, /**/ B12,
131 /* ************** */ /* ****************** */
132 &BBL, /**/ &BBR, B20, B21, /**/ B22,
133 FLA_TL );
134
135 FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, C01, /**/ C02,
136 C10, C11, /**/ C12,
137 /* ************** */ /* ****************** */
138 &CBL, /**/ &CBR, C20, C21, /**/ C22,
139 FLA_TL );
140
141 }
142
143 return FLA_SUCCESS;
144}

References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), FLA_Sylv_internal(), and i.

Referenced by FLA_Sylv_hn().

◆ FLA_Sylv_hn_blk_var7()

FLA_Error FLA_Sylv_hn_blk_var7 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale,
fla_sylv_t cntl 
)
16{
17 FLA_Obj ATL, ATR, A00, A01, A02,
18 ABL, ABR, A10, A11, A12,
19 A20, A21, A22;
20
21 FLA_Obj BTL, BTR, B00, B01, B02,
22 BBL, BBR, B10, B11, B12,
23 B20, B21, B22;
24
25 FLA_Obj CTL, CTR, C00, C01, C02,
26 CBL, CBR, C10, C11, C12,
27 C20, C21, C22;
28
29 dim_t b;
30
31 FLA_Part_2x2( A, &ATL, &ATR,
32 &ABL, &ABR, 0, 0, FLA_TL );
33
34 FLA_Part_2x2( B, &BTL, &BTR,
35 &BBL, &BBR, 0, 0, FLA_TL );
36
37 FLA_Part_2x2( C, &CTL, &CTR,
38 &CBL, &CBR, 0, 0, FLA_TL );
39
40 while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){
41
43
44 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
45 /* ************* */ /* ******************** */
46 &A10, /**/ &A11, &A12,
47 ABL, /**/ ABR, &A20, /**/ &A21, &A22,
48 b, b, FLA_BR );
49
50 FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, /**/ &B01, &B02,
51 /* ************* */ /* ******************** */
52 &B10, /**/ &B11, &B12,
53 BBL, /**/ BBR, &B20, /**/ &B21, &B22,
54 b, b, FLA_BR );
55
56 FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, /**/ &C01, &C02,
57 /* ************* */ /* ******************** */
58 &C10, /**/ &C11, &C12,
59 CBL, /**/ CBR, &C20, /**/ &C21, &C22,
60 b, b, FLA_BR );
61
62 // Loop Invariant:
63 // CTL =
64 // CTR =
65 // CBL =
66 // CBR =
67
68 /*------------------------------------------------------------*/
69
70 // C01 = sylv( A00', B11, C01 );
72 isgn, A00, B11, C01, scale,
73 FLA_Cntl_sub_sylv1( cntl ) );
74
75 // C11 = sylv( A11', B11, C11 - A01' * C01 -/+ C10 * B01 );
78 FLA_Cntl_sub_gemm1( cntl ) );
79
82 FLA_Cntl_sub_gemm2( cntl ) );
83
85 isgn, A11, B11, C11, scale,
86 FLA_Cntl_sub_sylv2( cntl ) );
87
88 // C21 = sylv( A22', B11, C21 - A12' * C11 - A02' * C01 -/+ C20 * B01 );
91 FLA_Cntl_sub_gemm3( cntl ) );
92
95 FLA_Cntl_sub_gemm4( cntl ) );
96
99 FLA_Cntl_sub_gemm5( cntl ) );
100
102 isgn, A22, B11, C21, scale,
103 FLA_Cntl_sub_sylv3( cntl ) );
104
105 // C12 = C12 -/+ C10 * B02 -/+ C11 * B12;
108 FLA_Cntl_sub_gemm6( cntl ) );
109
112 FLA_Cntl_sub_gemm7( cntl ) );
113
114 // C02 = C02 -/+ C01 * B12;
117 FLA_Cntl_sub_gemm8( cntl ) );
118
119 /*------------------------------------------------------------*/
120
121 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
122 A10, A11, /**/ A12,
123 /* ************** */ /* ****************** */
124 &ABL, /**/ &ABR, A20, A21, /**/ A22,
125 FLA_TL );
126
127 FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, B01, /**/ B02,
128 B10, B11, /**/ B12,
129 /* ************** */ /* ****************** */
130 &BBL, /**/ &BBR, B20, B21, /**/ B22,
131 FLA_TL );
132
133 FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, C01, /**/ C02,
134 C10, C11, /**/ C12,
135 /* ************** */ /* ****************** */
136 &CBL, /**/ &CBR, C20, C21, /**/ C22,
137 FLA_TL );
138
139 }
140
141 return FLA_SUCCESS;
142}

References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), FLA_Sylv_internal(), and i.

Referenced by FLA_Sylv_hn().

◆ FLA_Sylv_hn_blk_var8()

FLA_Error FLA_Sylv_hn_blk_var8 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale,
fla_sylv_t cntl 
)
16{
17 FLA_Obj ATL, ATR, A00, A01, A02,
18 ABL, ABR, A10, A11, A12,
19 A20, A21, A22;
20
21 FLA_Obj BTL, BTR, B00, B01, B02,
22 BBL, BBR, B10, B11, B12,
23 B20, B21, B22;
24
25 FLA_Obj CTL, CTR, C00, C01, C02,
26 CBL, CBR, C10, C11, C12,
27 C20, C21, C22;
28
29 dim_t b;
30
31 FLA_Part_2x2( A, &ATL, &ATR,
32 &ABL, &ABR, 0, 0, FLA_TL );
33
34 FLA_Part_2x2( B, &BTL, &BTR,
35 &BBL, &BBR, 0, 0, FLA_TL );
36
37 FLA_Part_2x2( C, &CTL, &CTR,
38 &CBL, &CBR, 0, 0, FLA_TL );
39
40 while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){
41
43
44 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
45 /* ************* */ /* ******************** */
46 &A10, /**/ &A11, &A12,
47 ABL, /**/ ABR, &A20, /**/ &A21, &A22,
48 b, b, FLA_BR );
49
50 FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, /**/ &B01, &B02,
51 /* ************* */ /* ******************** */
52 &B10, /**/ &B11, &B12,
53 BBL, /**/ BBR, &B20, /**/ &B21, &B22,
54 b, b, FLA_BR );
55
56 FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, /**/ &C01, &C02,
57 /* ************* */ /* ******************** */
58 &C10, /**/ &C11, &C12,
59 CBL, /**/ CBR, &C20, /**/ &C21, &C22,
60 b, b, FLA_BR );
61
62 // Loop Invariant:
63 // CTL =
64 // CTR =
65 // CBL =
66 // CBR =
67
68 /*------------------------------------------------------------*/
69
70 // C10 = sylv( A11', B00, C10 );
72 isgn, A11, B00, C10, scale,
73 FLA_Cntl_sub_sylv1( cntl ) );
74
75 // C20 = C20 - A12' * C10;
78 FLA_Cntl_sub_gemm1( cntl ) );
79
80 // C11 = sylv( A11', B11, C11 - A01' * C01 -/+ C10 * B01 );
83 FLA_Cntl_sub_gemm2( cntl ) );
84
87 FLA_Cntl_sub_gemm3( cntl ) );
88
90 isgn, A11, B11, C11, scale,
91 FLA_Cntl_sub_sylv2( cntl ) );
92
93 // C21 = C21 - A12' * C11 - A02' * C01;
96 FLA_Cntl_sub_gemm4( cntl ) );
97
100 FLA_Cntl_sub_gemm5( cntl ) );
101
102 // C12 = sylv( A11', B22, C12 - A01' * C02 -/+ C10 * B02 -/+ C11 * B12 );
105 FLA_Cntl_sub_gemm6( cntl ) );
106
109 FLA_Cntl_sub_gemm7( cntl ) );
110
113 FLA_Cntl_sub_gemm8( cntl ) );
114
116 isgn, A11, B22, C12, scale,
117 FLA_Cntl_sub_sylv3( cntl ) );
118
119 /*------------------------------------------------------------*/
120
121 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
122 A10, A11, /**/ A12,
123 /* ************** */ /* ****************** */
124 &ABL, /**/ &ABR, A20, A21, /**/ A22,
125 FLA_TL );
126
127 FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, B01, /**/ B02,
128 B10, B11, /**/ B12,
129 /* ************** */ /* ****************** */
130 &BBL, /**/ &BBR, B20, B21, /**/ B22,
131 FLA_TL );
132
133 FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, C01, /**/ C02,
134 C10, C11, /**/ C12,
135 /* ************** */ /* ****************** */
136 &CBL, /**/ &CBR, C20, C21, /**/ C22,
137 FLA_TL );
138
139 }
140
141 return FLA_SUCCESS;
142}

References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), FLA_Sylv_internal(), and i.

Referenced by FLA_Sylv_hn().

◆ FLA_Sylv_hn_blk_var9()

FLA_Error FLA_Sylv_hn_blk_var9 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale,
fla_sylv_t cntl 
)
16{
17 FLA_Obj ATL, ATR, A00, A01, A02,
18 ABL, ABR, A10, A11, A12,
19 A20, A21, A22;
20
21 FLA_Obj BTL, BTR, B00, B01, B02,
22 BBL, BBR, B10, B11, B12,
23 B20, B21, B22;
24
25 FLA_Obj CTL, CTR, C00, C01, C02,
26 CBL, CBR, C10, C11, C12,
27 C20, C21, C22;
28
29 dim_t b;
30
31 FLA_Part_2x2( A, &ATL, &ATR,
32 &ABL, &ABR, 0, 0, FLA_TL );
33
34 FLA_Part_2x2( B, &BTL, &BTR,
35 &BBL, &BBR, 0, 0, FLA_TL );
36
37 FLA_Part_2x2( C, &CTL, &CTR,
38 &CBL, &CBR, 0, 0, FLA_TL );
39
40 while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){
41
43
44 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
45 /* ************* */ /* ******************** */
46 &A10, /**/ &A11, &A12,
47 ABL, /**/ ABR, &A20, /**/ &A21, &A22,
48 b, b, FLA_BR );
49
50 FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, /**/ &B01, &B02,
51 /* ************* */ /* ******************** */
52 &B10, /**/ &B11, &B12,
53 BBL, /**/ BBR, &B20, /**/ &B21, &B22,
54 b, b, FLA_BR );
55
56 FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, /**/ &C01, &C02,
57 /* ************* */ /* ******************** */
58 &C10, /**/ &C11, &C12,
59 CBL, /**/ CBR, &C20, /**/ &C21, &C22,
60 b, b, FLA_BR );
61
62 // Loop Invariant:
63 // CTL =
64 // CTR =
65 // CBL =
66 // CBR =
67
68 /*------------------------------------------------------------*/
69
70 // C11 = sylv( A11', B11, C11 - A01' * C01 -/+ C10 * B01 );
73 FLA_Cntl_sub_gemm1( cntl ) );
74
77 FLA_Cntl_sub_gemm2( cntl ) );
78
80 isgn, A11, B11, C11, scale,
81 FLA_Cntl_sub_sylv1( cntl ) );
82
83 // C21 = sylv( A22, B11, C21 - A12' * C11 - A02' * C01 -/+ C20 * B01 );
86 FLA_Cntl_sub_gemm3( cntl ) );
87
90 FLA_Cntl_sub_gemm4( cntl ) );
91
94 FLA_Cntl_sub_gemm5( cntl ) );
95
97 isgn, A22, B11, C21, scale,
98 FLA_Cntl_sub_sylv2( cntl ) );
99
100 // C12 = sylv( A11', B22, C12 - A01' * C02 -/+ C10 * B02 -/+ C11 * B12 );
103 FLA_Cntl_sub_gemm6( cntl ) );
104
107 FLA_Cntl_sub_gemm7( cntl ) );
108
111 FLA_Cntl_sub_gemm8( cntl ) );
112
114 isgn, A11, B22, C12, scale,
115 FLA_Cntl_sub_sylv3( cntl ) );
116
117 /*------------------------------------------------------------*/
118
119 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
120 A10, A11, /**/ A12,
121 /* ************** */ /* ****************** */
122 &ABL, /**/ &ABR, A20, A21, /**/ A22,
123 FLA_TL );
124
125 FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, B01, /**/ B02,
126 B10, B11, /**/ B12,
127 /* ************** */ /* ****************** */
128 &BBL, /**/ &BBR, B20, B21, /**/ B22,
129 FLA_TL );
130
131 FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, C01, /**/ C02,
132 C10, C11, /**/ C12,
133 /* ************** */ /* ****************** */
134 &CBL, /**/ &CBR, C20, C21, /**/ C22,
135 FLA_TL );
136
137 }
138
139 return FLA_SUCCESS;
140}

References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), FLA_Sylv_internal(), and i.

Referenced by FLA_Sylv_hn().

◆ FLA_Sylv_hn_opc_var1()

FLA_Error FLA_Sylv_hn_opc_var1 ( float  sgn,
int  m_C,
int  n_C,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_B,
int  rs_B,
int  cs_B,
scomplex buff_C,
int  rs_C,
int  cs_C,
scomplex buff_scale,
int info 
)
262{
263 int l, k;
264
265 for ( l = 0; l < n_C; l++ )
266 {
267 for ( k = 0; k < m_C; k++ )
268 {
269 scomplex* a01 = buff_A + (k )*cs_A + (0 )*rs_A;
270 scomplex* b01 = buff_B + (l )*cs_B + (0 )*rs_B;
271 scomplex* c10t = buff_C + (0 )*cs_C + (k )*rs_C;
272 scomplex* c01 = buff_C + (l )*cs_C + (0 )*rs_C;
273 scomplex* alpha11 = buff_A + (k )*cs_A + (k )*rs_A;
274 scomplex* beta11 = buff_B + (l )*cs_B + (l )*rs_B;
275 scomplex* ckl = buff_C + (l )*cs_C + (k )*rs_C;
281
282 int m_behind = k;
283 int n_behind = l;
284
285 /*------------------------------------------------------------*/
286
288 m_behind,
289 a01, rs_A,
290 c01, rs_C,
291 &suml );
292
294 n_behind,
295 c10t, cs_C,
296 b01, rs_B,
297 &sumr );
298
299 vec.real = ckl->real - ( suml.real + sgn * sumr.real );
300 vec.imag = ckl->imag - ( suml.imag + sgn * sumr.imag );
301
302 a11.real = alpha11->real + sgn * beta11->real;
303 a11.imag = -alpha11->imag + sgn * beta11->imag;
304
305 bl1_cdiv3( &vec, &a11, &x11 );
306
307 *ckl = x11;
308
309 /*------------------------------------------------------------*/
310
311 }
312 }
313
314 return FLA_SUCCESS;
315}
void bl1_cdot(conj1_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho)
Definition bl1_dot.c:39
@ BLIS1_CONJUGATE
Definition blis_type_defs.h:82
@ BLIS1_NO_CONJUGATE
Definition blis_type_defs.h:81
Definition blis_type_defs.h:133

References bl1_cdot(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, and i.

Referenced by FLA_Sylv_hn_opt_var1().

◆ FLA_Sylv_hn_opd_var1()

FLA_Error FLA_Sylv_hn_opd_var1 ( double  sgn,
int  m_C,
int  n_C,
double buff_A,
int  rs_A,
int  cs_A,
double buff_B,
int  rs_B,
int  cs_B,
double buff_C,
int  rs_C,
int  cs_C,
double buff_scale,
int info 
)
199{
200 int l, k;
201
202 for ( l = 0; l < n_C; l++ )
203 {
204 for ( k = 0; k < m_C; k++ )
205 {
206 double* a01 = buff_A + (k )*cs_A + (0 )*rs_A;
207 double* b01 = buff_B + (l )*cs_B + (0 )*rs_B;
208 double* c10t = buff_C + (0 )*cs_C + (k )*rs_C;
209 double* c01 = buff_C + (l )*cs_C + (0 )*rs_C;
210 double* alpha11 = buff_A + (k )*cs_A + (k )*rs_A;
211 double* beta11 = buff_B + (l )*cs_B + (l )*rs_B;
212 double* ckl = buff_C + (l )*cs_C + (k )*rs_C;
213 double suml;
214 double sumr;
215 double vec;
216 double a11;
217 double x11;
218
219 int m_behind = k;
220 int n_behind = l;
221
222 /*------------------------------------------------------------*/
223
225 m_behind,
226 a01, rs_A,
227 c01, rs_C,
228 &suml );
229
231 n_behind,
232 c10t, cs_C,
233 b01, rs_B,
234 &sumr );
235
236 vec = (*ckl) - ( suml + sgn * sumr );
237
238 a11 = (*alpha11) + sgn * (*beta11);
239
240 bl1_ddiv3( &vec, &a11, &x11 );
241
242 *ckl = x11;
243
244 /*------------------------------------------------------------*/
245
246 }
247 }
248
249 return FLA_SUCCESS;
250}
void bl1_ddot(conj1_t conj, int n, double *x, int incx, double *y, int incy, double *rho)
Definition bl1_dot.c:26

References bl1_ddot(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, and i.

Referenced by FLA_Sylv_hn_opt_var1().

◆ FLA_Sylv_hn_ops_var1()

FLA_Error FLA_Sylv_hn_ops_var1 ( float  sgn,
int  m_C,
int  n_C,
float buff_A,
int  rs_A,
int  cs_A,
float buff_B,
int  rs_B,
int  cs_B,
float buff_C,
int  rs_C,
int  cs_C,
float buff_scale,
int info 
)
136{
137 int l, k;
138
139 for ( l = 0; l < n_C; l++ )
140 {
141 for ( k = 0; k < m_C; k++ )
142 {
143 float* a01 = buff_A + (k )*cs_A + (0 )*rs_A;
144 float* b01 = buff_B + (l )*cs_B + (0 )*rs_B;
145 float* c10t = buff_C + (0 )*cs_C + (k )*rs_C;
146 float* c01 = buff_C + (l )*cs_C + (0 )*rs_C;
147 float* alpha11 = buff_A + (k )*cs_A + (k )*rs_A;
148 float* beta11 = buff_B + (l )*cs_B + (l )*rs_B;
149 float* ckl = buff_C + (l )*cs_C + (k )*rs_C;
150 float suml;
151 float sumr;
152 float vec;
153 float a11;
154 float x11;
155
156 int m_behind = k;
157 int n_behind = l;
158
159 /*------------------------------------------------------------*/
160
162 m_behind,
163 a01, rs_A,
164 c01, rs_C,
165 &suml );
166
168 n_behind,
169 c10t, cs_C,
170 b01, rs_B,
171 &sumr );
172
173 vec = (*ckl) - ( suml + sgn * sumr );
174
175 a11 = (*alpha11) + sgn * (*beta11);
176
177 bl1_sdiv3( &vec, &a11, &x11 );
178
179 *ckl = x11;
180
181 /*------------------------------------------------------------*/
182
183 }
184 }
185
186 return FLA_SUCCESS;
187}
void bl1_sdot(conj1_t conj, int n, float *x, int incx, float *y, int incy, float *rho)
Definition bl1_dot.c:13

References bl1_sdot(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, and i.

Referenced by FLA_Sylv_hn_opt_var1().

◆ FLA_Sylv_hn_opt_var1()

FLA_Error FLA_Sylv_hn_opt_var1 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale 
)
14{
15 FLA_Datatype datatype;
16 int m_C, n_C;
17 int rs_A, cs_A;
18 int rs_B, cs_B;
19 int rs_C, cs_C;
20 int info;
21
22 datatype = FLA_Obj_datatype( A );
23
26
29
30 m_C = FLA_Obj_length( C );
31 n_C = FLA_Obj_width( C );
34
35
36 switch ( datatype )
37 {
38 case FLA_FLOAT:
39 {
40 int* buff_isgn = FLA_INT_PTR( isgn );
41 float* buff_A = FLA_FLOAT_PTR( A );
42 float* buff_B = FLA_FLOAT_PTR( B );
43 float* buff_C = FLA_FLOAT_PTR( C );
44 float* buff_scale = FLA_FLOAT_PTR( scale );
45 float sgn = ( float ) *buff_isgn;
46
48 m_C,
49 n_C,
54 &info );
55
56 break;
57 }
58
59 case FLA_DOUBLE:
60 {
61 int* buff_isgn = FLA_INT_PTR( isgn );
62 double* buff_A = FLA_DOUBLE_PTR( A );
63 double* buff_B = FLA_DOUBLE_PTR( B );
64 double* buff_C = FLA_DOUBLE_PTR( C );
65 double* buff_scale = FLA_DOUBLE_PTR( scale );
66 double sgn = ( double ) *buff_isgn;
67
69 m_C,
70 n_C,
75 &info );
76
77 break;
78 }
79
80 case FLA_COMPLEX:
81 {
82 int* buff_isgn = FLA_INT_PTR( isgn );
87 float sgn = ( float ) *buff_isgn;
88
90 m_C,
91 n_C,
96 &info );
97
98 break;
99 }
100
102 {
103 int* buff_isgn = FLA_INT_PTR( isgn );
108 double sgn = ( double ) *buff_isgn;
109
111 m_C,
112 n_C,
113 buff_A, rs_A, cs_A,
114 buff_B, rs_B, cs_B,
115 buff_C, rs_C, cs_C,
117 &info );
118
119 break;
120 }
121 }
122
123 return FLA_SUCCESS;
124}
FLA_Error FLA_Sylv_hn_opd_var1(double sgn, int m_C, int n_C, double *buff_A, int rs_A, int cs_A, double *buff_B, int rs_B, int cs_B, double *buff_C, int rs_C, int cs_C, double *buff_scale, int *info)
Definition FLA_Sylv_hn_opt_var1.c:191
FLA_Error FLA_Sylv_hn_opz_var1(double sgn, int m_C, int n_C, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_B, int rs_B, int cs_B, dcomplex *buff_C, int rs_C, int cs_C, dcomplex *buff_scale, int *info)
Definition FLA_Sylv_hn_opt_var1.c:319
FLA_Error FLA_Sylv_hn_ops_var1(float sgn, int m_C, int n_C, float *buff_A, int rs_A, int cs_A, float *buff_B, int rs_B, int cs_B, float *buff_C, int rs_C, int cs_C, float *buff_scale, int *info)
Definition FLA_Sylv_hn_opt_var1.c:128
FLA_Error FLA_Sylv_hn_opc_var1(float sgn, int m_C, int n_C, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_B, int rs_B, int cs_B, scomplex *buff_C, int rs_C, int cs_C, scomplex *buff_scale, int *info)
Definition FLA_Sylv_hn_opt_var1.c:254
dim_t FLA_Obj_width(FLA_Obj obj)
Definition FLA_Query.c:123
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition FLA_Query.c:167
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition FLA_Query.c:174
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition FLA_Query.c:13
int FLA_Datatype
Definition FLA_type_defs.h:49
Definition blis_type_defs.h:138

References FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), FLA_Sylv_hn_opc_var1(), FLA_Sylv_hn_opd_var1(), FLA_Sylv_hn_ops_var1(), FLA_Sylv_hn_opz_var1(), and i.

Referenced by FLA_Sylv_hn(), FLA_Sylv_hn_opt_var10(), FLA_Sylv_hn_opt_var11(), FLA_Sylv_hn_opt_var12(), FLA_Sylv_hn_opt_var13(), FLA_Sylv_hn_opt_var14(), FLA_Sylv_hn_opt_var15(), FLA_Sylv_hn_opt_var16(), FLA_Sylv_hn_opt_var17(), FLA_Sylv_hn_opt_var18(), FLA_Sylv_hn_opt_var2(), FLA_Sylv_hn_opt_var3(), FLA_Sylv_hn_opt_var4(), FLA_Sylv_hn_opt_var5(), FLA_Sylv_hn_opt_var6(), FLA_Sylv_hn_opt_var7(), FLA_Sylv_hn_opt_var8(), and FLA_Sylv_hn_opt_var9().

◆ FLA_Sylv_hn_opt_var10()

FLA_Error FLA_Sylv_hn_opt_var10 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale 
)
16{
17 return FLA_Sylv_hn_opt_var1( isgn, A, B, C, scale );
18}
FLA_Error FLA_Sylv_hn_opt_var1(FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale)
Definition FLA_Sylv_hn_opt_var1.c:13

References FLA_Sylv_hn_opt_var1(), and i.

◆ FLA_Sylv_hn_opt_var11()

FLA_Error FLA_Sylv_hn_opt_var11 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale 
)
16{
17 return FLA_Sylv_hn_opt_var1( isgn, A, B, C, scale );
18}

References FLA_Sylv_hn_opt_var1(), and i.

◆ FLA_Sylv_hn_opt_var12()

FLA_Error FLA_Sylv_hn_opt_var12 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale 
)
16{
17 return FLA_Sylv_hn_opt_var1( isgn, A, B, C, scale );
18}

References FLA_Sylv_hn_opt_var1(), and i.

◆ FLA_Sylv_hn_opt_var13()

FLA_Error FLA_Sylv_hn_opt_var13 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale 
)
16{
17 return FLA_Sylv_hn_opt_var1( isgn, A, B, C, scale );
18}

References FLA_Sylv_hn_opt_var1(), and i.

◆ FLA_Sylv_hn_opt_var14()

FLA_Error FLA_Sylv_hn_opt_var14 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale 
)
16{
17 return FLA_Sylv_hn_opt_var1( isgn, A, B, C, scale );
18}

References FLA_Sylv_hn_opt_var1(), and i.

◆ FLA_Sylv_hn_opt_var15()

FLA_Error FLA_Sylv_hn_opt_var15 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale 
)
16{
17 return FLA_Sylv_hn_opt_var1( isgn, A, B, C, scale );
18}

References FLA_Sylv_hn_opt_var1(), and i.

◆ FLA_Sylv_hn_opt_var16()

FLA_Error FLA_Sylv_hn_opt_var16 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale 
)
16{
17 return FLA_Sylv_hn_opt_var1( isgn, A, B, C, scale );
18}

References FLA_Sylv_hn_opt_var1(), and i.

◆ FLA_Sylv_hn_opt_var17()

FLA_Error FLA_Sylv_hn_opt_var17 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale 
)
16{
17 return FLA_Sylv_hn_opt_var1( isgn, A, B, C, scale );
18}

References FLA_Sylv_hn_opt_var1(), and i.

◆ FLA_Sylv_hn_opt_var18()

FLA_Error FLA_Sylv_hn_opt_var18 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale 
)
16{
17 return FLA_Sylv_hn_opt_var1( isgn, A, B, C, scale );
18}

References FLA_Sylv_hn_opt_var1(), and i.

◆ FLA_Sylv_hn_opt_var2()

FLA_Error FLA_Sylv_hn_opt_var2 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale 
)
16{
17 return FLA_Sylv_hn_opt_var1( isgn, A, B, C, scale );
18}

References FLA_Sylv_hn_opt_var1(), and i.

◆ FLA_Sylv_hn_opt_var3()

FLA_Error FLA_Sylv_hn_opt_var3 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale 
)
16{
17 return FLA_Sylv_hn_opt_var1( isgn, A, B, C, scale );
18}

References FLA_Sylv_hn_opt_var1(), and i.

◆ FLA_Sylv_hn_opt_var4()

FLA_Error FLA_Sylv_hn_opt_var4 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale 
)
16{
17 return FLA_Sylv_hn_opt_var1( isgn, A, B, C, scale );
18}

References FLA_Sylv_hn_opt_var1(), and i.

◆ FLA_Sylv_hn_opt_var5()

FLA_Error FLA_Sylv_hn_opt_var5 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale 
)
16{
17 return FLA_Sylv_hn_opt_var1( isgn, A, B, C, scale );
18}

References FLA_Sylv_hn_opt_var1(), and i.

◆ FLA_Sylv_hn_opt_var6()

FLA_Error FLA_Sylv_hn_opt_var6 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale 
)
16{
17 return FLA_Sylv_hn_opt_var1( isgn, A, B, C, scale );
18}

References FLA_Sylv_hn_opt_var1(), and i.

◆ FLA_Sylv_hn_opt_var7()

FLA_Error FLA_Sylv_hn_opt_var7 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale 
)
16{
17 return FLA_Sylv_hn_opt_var1( isgn, A, B, C, scale );
18}

References FLA_Sylv_hn_opt_var1(), and i.

◆ FLA_Sylv_hn_opt_var8()

FLA_Error FLA_Sylv_hn_opt_var8 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale 
)
16{
17 return FLA_Sylv_hn_opt_var1( isgn, A, B, C, scale );
18}

References FLA_Sylv_hn_opt_var1(), and i.

◆ FLA_Sylv_hn_opt_var9()

FLA_Error FLA_Sylv_hn_opt_var9 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale 
)
16{
17 return FLA_Sylv_hn_opt_var1( isgn, A, B, C, scale );
18}

References FLA_Sylv_hn_opt_var1(), and i.

◆ FLA_Sylv_hn_opz_var1()

FLA_Error FLA_Sylv_hn_opz_var1 ( double  sgn,
int  m_C,
int  n_C,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_B,
int  rs_B,
int  cs_B,
dcomplex buff_C,
int  rs_C,
int  cs_C,
dcomplex buff_scale,
int info 
)
327{
328 int l, k;
329
330 for ( l = 0; l < n_C; l++ )
331 {
332 for ( k = 0; k < m_C; k++ )
333 {
334 dcomplex* a01 = buff_A + (k )*cs_A + (0 )*rs_A;
335 dcomplex* b01 = buff_B + (l )*cs_B + (0 )*rs_B;
336 dcomplex* c10t = buff_C + (0 )*cs_C + (k )*rs_C;
337 dcomplex* c01 = buff_C + (l )*cs_C + (0 )*rs_C;
338 dcomplex* alpha11 = buff_A + (k )*cs_A + (k )*rs_A;
339 dcomplex* beta11 = buff_B + (l )*cs_B + (l )*rs_B;
340 dcomplex* ckl = buff_C + (l )*cs_C + (k )*rs_C;
346
347 int m_behind = k;
348 int n_behind = l;
349
350 /*------------------------------------------------------------*/
351
353 m_behind,
354 a01, rs_A,
355 c01, rs_C,
356 &suml );
357
359 n_behind,
360 c10t, cs_C,
361 b01, rs_B,
362 &sumr );
363
364 vec.real = ckl->real - ( suml.real + sgn * sumr.real );
365 vec.imag = ckl->imag - ( suml.imag + sgn * sumr.imag );
366
367 a11.real = alpha11->real + sgn * beta11->real;
368 a11.imag = -alpha11->imag + sgn * beta11->imag;
369
370 bl1_zdiv3( &vec, &a11, &x11 );
371
372 *ckl = x11;
373
374 /*------------------------------------------------------------*/
375
376 }
377 }
378
379 return FLA_SUCCESS;
380}
void bl1_zdot(conj1_t conj, int n, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *rho)
Definition bl1_dot.c:65

References bl1_zdot(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, and i.

Referenced by FLA_Sylv_hn_opt_var1().