libflame revision_anchor
Functions
FLA_Fused_Gerc2_opt_var1.c File Reference

(r)

Functions

FLA_Error FLA_Fused_Gerc2_opt_var1 (FLA_Obj alpha, FLA_Obj u, FLA_Obj y, FLA_Obj z, FLA_Obj v, FLA_Obj A)
 
FLA_Error FLA_Fused_Gerc2_ops_var1 (int m_A, int n_A, float *buff_alpha, float *buff_u, int inc_u, float *buff_y, int inc_y, float *buff_z, int inc_z, float *buff_v, int inc_v, float *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Fused_Gerc2_opd_var1 (int m_A, int n_A, double *buff_alpha, double *buff_u, int inc_u, double *buff_y, int inc_y, double *buff_z, int inc_z, double *buff_v, int inc_v, double *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Fused_Gerc2_opc_var1 (int m_A, int n_A, scomplex *buff_alpha, scomplex *buff_u, int inc_u, scomplex *buff_y, int inc_y, scomplex *buff_z, int inc_z, scomplex *buff_v, int inc_v, scomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Fused_Gerc2_opz_var1 (int m_A, int n_A, dcomplex *buff_alpha, dcomplex *buff_u, int inc_u, dcomplex *buff_y, int inc_y, dcomplex *buff_z, int inc_z, dcomplex *buff_v, int inc_v, dcomplex *buff_A, int rs_A, int cs_A)
 

Function Documentation

◆ FLA_Fused_Gerc2_opc_var1()

FLA_Error FLA_Fused_Gerc2_opc_var1 ( int  m_A,
int  n_A,
scomplex buff_alpha,
scomplex buff_u,
int  inc_u,
scomplex buff_y,
int  inc_y,
scomplex buff_z,
int  inc_z,
scomplex buff_v,
int  inc_v,
scomplex buff_A,
int  rs_A,
int  cs_A 
)
249{
250 int i;
251
252 for ( i = 0; i < n_A; ++i )
253 {
254 scomplex* a1 = buff_A + (i )*cs_A + (0 )*rs_A;
255 scomplex* u = buff_u;
256 scomplex* psi1 = buff_y + (i )*inc_y;
257 scomplex* z = buff_z;
258 scomplex* nu1 = buff_v + (i )*inc_v;
264
265 /*------------------------------------------------------------*/
266
269
272
274 m_A,
275 &temp1,
276 u, inc_u,
277 a1, rs_A );
278/*
279 F77_caxpy( &m_A,
280 &temp1,
281 u, &inc_u,
282 a1, &rs_A );
283*/
284
286 m_A,
287 &temp2,
288 z, inc_z,
289 a1, rs_A );
290/*
291 F77_caxpy( &m_A,
292 &temp2,
293 z, &inc_z,
294 a1, &rs_A );
295*/
296
297 /*------------------------------------------------------------*/
298
299 }
300
301 return FLA_SUCCESS;
302}
double *restrict psi1
Definition bl1_axmyv2.c:141
int i
Definition bl1_axmyv2.c:145
double temp2
Definition bl1_axpyv2b.c:147
double temp1
Definition bl1_axpyv2b.c:146
void bl1_caxpyv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
Definition bl1_axpyv.c:29
@ BLIS1_NO_CONJUGATE
Definition blis_type_defs.h:81
Definition blis_type_defs.h:133

References bl1_caxpyv(), BLIS1_NO_CONJUGATE, i, psi1, temp1, and temp2.

Referenced by FLA_Bidiag_UT_u_step_ofc_var2(), FLA_Fused_Gerc2_opt_var1(), FLA_Hess_UT_step_ofc_var2(), and FLA_Hess_UT_step_ofc_var3().

◆ FLA_Fused_Gerc2_opd_var1()

FLA_Error FLA_Fused_Gerc2_opd_var1 ( int  m_A,
int  n_A,
double buff_alpha,
double buff_u,
int  inc_u,
double buff_y,
int  inc_y,
double buff_z,
int  inc_z,
double buff_v,
int  inc_v,
double buff_A,
int  rs_A,
int  cs_A 
)
201{
202 int i;
203
204 for ( i = 0; i < n_A; ++i )
205 {
206/*
207 Effective computation:
208 A = A + alpha * ( u * y' + z * v' );
209*/
210 double* restrict a1 = buff_A + (i )*cs_A + (0 )*rs_A;
211 double* restrict u = buff_u;
212 double* restrict psi1 = buff_y + (i )*inc_y;
213 double* restrict z = buff_z;
214 double* restrict nu1 = buff_v + (i )*inc_v;
215 double* restrict alpha = buff_alpha;
216 double alpha_conj_psi1;
217 double alpha_conj_nu1;
218
219 /*------------------------------------------------------------*/
220
222
224
228 u, inc_u,
229 z, inc_z,
230 a1, rs_A );
231
232 /*------------------------------------------------------------*/
233
234 }
235
236 return FLA_SUCCESS;
237}
void bl1_daxpyv2b(int n, double *alpha1, double *alpha2, double *x1, int inc_x1, double *x2, int inc_x2, double *y, int inc_y)
Definition bl1_axpyv2b.c:31

References bl1_daxpyv2b(), i, and psi1.

Referenced by FLA_Bidiag_UT_u_step_ofd_var2(), FLA_Fused_Gerc2_opt_var1(), FLA_Hess_UT_step_ofd_var2(), and FLA_Hess_UT_step_ofd_var3().

◆ FLA_Fused_Gerc2_ops_var1()

FLA_Error FLA_Fused_Gerc2_ops_var1 ( int  m_A,
int  n_A,
float buff_alpha,
float buff_u,
int  inc_u,
float buff_y,
int  inc_y,
float buff_z,
int  inc_z,
float buff_v,
int  inc_v,
float buff_A,
int  rs_A,
int  cs_A 
)
138{
139 int i;
140
141 for ( i = 0; i < n_A; ++i )
142 {
143 float* a1 = buff_A + (i )*cs_A + (0 )*rs_A;
144 float* u = buff_u;
145 float* psi1 = buff_y + (i )*inc_y;
146 float* z = buff_z;
147 float* nu1 = buff_v + (i )*inc_v;
148 float* alpha = buff_alpha;
149 float temp1;
150 float temp2;
151
152 /*------------------------------------------------------------*/
153
154 // bl1_smult3( alpha, psi1, &temp1 );
155 temp1 = *alpha * *psi1;
156
157 // bl1_smult3( alpha, nu1, &temp2 );
158 temp2 = *alpha * *nu1;
159
161 m_A,
162 &temp1,
163 u, inc_u,
164 a1, rs_A );
165/*
166 F77_saxpy( &m_A,
167 &temp1,
168 u, &inc_u,
169 a1, &rs_A );
170*/
171
173 m_A,
174 &temp2,
175 z, inc_z,
176 a1, rs_A );
177/*
178 F77_saxpy( &m_A,
179 &temp2,
180 z, &inc_z,
181 a1, &rs_A );
182*/
183
184 /*------------------------------------------------------------*/
185
186 }
187
188 return FLA_SUCCESS;
189}
void bl1_saxpyv(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy)
Definition bl1_axpyv.c:13

References bl1_saxpyv(), BLIS1_NO_CONJUGATE, i, psi1, temp1, and temp2.

Referenced by FLA_Bidiag_UT_u_step_ofs_var2(), FLA_Fused_Gerc2_opt_var1(), FLA_Hess_UT_step_ofs_var2(), and FLA_Hess_UT_step_ofs_var3().

◆ FLA_Fused_Gerc2_opt_var1()

FLA_Error FLA_Fused_Gerc2_opt_var1 ( FLA_Obj  alpha,
FLA_Obj  u,
FLA_Obj  y,
FLA_Obj  z,
FLA_Obj  v,
FLA_Obj  A 
)
14{
15/*
16 Effective computation:
17 A = A + alpha * ( u * y' + z * v' );
18*/
19 FLA_Datatype datatype;
20 int m_A, n_A;
21 int rs_A, cs_A;
22 int inc_u, inc_y, inc_z, inc_v;
23
24 datatype = FLA_Obj_datatype( A );
25
26 m_A = FLA_Obj_length( A );
27 n_A = FLA_Obj_width( A );
28
31
36
37
38 switch ( datatype )
39 {
40 case FLA_FLOAT:
41 {
42 float* buff_A = FLA_FLOAT_PTR( A );
43 float* buff_u = FLA_FLOAT_PTR( u );
44 float* buff_y = FLA_FLOAT_PTR( y );
45 float* buff_z = FLA_FLOAT_PTR( z );
46 float* buff_v = FLA_FLOAT_PTR( v );
47 float* buff_alpha = FLA_FLOAT_PTR( alpha );
48
50 n_A,
56 buff_A, rs_A, cs_A );
57
58 break;
59 }
60
61 case FLA_DOUBLE:
62 {
63 double* buff_A = FLA_DOUBLE_PTR( A );
64 double* buff_u = FLA_DOUBLE_PTR( u );
65 double* buff_y = FLA_DOUBLE_PTR( y );
66 double* buff_z = FLA_DOUBLE_PTR( z );
67 double* buff_v = FLA_DOUBLE_PTR( v );
68 double* buff_alpha = FLA_DOUBLE_PTR( alpha );
69
71 n_A,
77 buff_A, rs_A, cs_A );
78
79 break;
80 }
81
82 case FLA_COMPLEX:
83 {
90
92 n_A,
98 buff_A, rs_A, cs_A );
99
100 break;
101 }
102
104 {
111
113 n_A,
115 buff_u, inc_u,
116 buff_y, inc_y,
117 buff_z, inc_z,
118 buff_v, inc_v,
119 buff_A, rs_A, cs_A );
120
121 break;
122 }
123 }
124
125 return FLA_SUCCESS;
126}
FLA_Error FLA_Fused_Gerc2_opd_var1(int m_A, int n_A, double *buff_alpha, double *buff_u, int inc_u, double *buff_y, int inc_y, double *buff_z, int inc_z, double *buff_v, int inc_v, double *buff_A, int rs_A, int cs_A)
Definition FLA_Fused_Gerc2_opt_var1.c:193
FLA_Error FLA_Fused_Gerc2_ops_var1(int m_A, int n_A, float *buff_alpha, float *buff_u, int inc_u, float *buff_y, int inc_y, float *buff_z, int inc_z, float *buff_v, int inc_v, float *buff_A, int rs_A, int cs_A)
Definition FLA_Fused_Gerc2_opt_var1.c:130
FLA_Error FLA_Fused_Gerc2_opz_var1(int m_A, int n_A, dcomplex *buff_alpha, dcomplex *buff_u, int inc_u, dcomplex *buff_y, int inc_y, dcomplex *buff_z, int inc_z, dcomplex *buff_v, int inc_v, dcomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Fused_Gerc2_opt_var1.c:306
FLA_Error FLA_Fused_Gerc2_opc_var1(int m_A, int n_A, scomplex *buff_alpha, scomplex *buff_u, int inc_u, scomplex *buff_y, int inc_y, scomplex *buff_z, int inc_z, scomplex *buff_v, int inc_v, scomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Fused_Gerc2_opt_var1.c:241
dim_t FLA_Obj_width(FLA_Obj obj)
Definition FLA_Query.c:123
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition FLA_Query.c:167
dim_t FLA_Obj_length(FLA_Obj obj)
Definition FLA_Query.c:116
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition FLA_Query.c:174
dim_t FLA_Obj_vector_inc(FLA_Obj obj)
Definition FLA_Query.c:145
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition FLA_Query.c:13
int FLA_Datatype
Definition FLA_type_defs.h:49
Definition blis_type_defs.h:138

References FLA_Fused_Gerc2_opc_var1(), FLA_Fused_Gerc2_opd_var1(), FLA_Fused_Gerc2_ops_var1(), FLA_Fused_Gerc2_opz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), FLA_Obj_width(), and i.

◆ FLA_Fused_Gerc2_opz_var1()

FLA_Error FLA_Fused_Gerc2_opz_var1 ( int  m_A,
int  n_A,
dcomplex buff_alpha,
dcomplex buff_u,
int  inc_u,
dcomplex buff_y,
int  inc_y,
dcomplex buff_z,
int  inc_z,
dcomplex buff_v,
int  inc_v,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)
314{
315 int i;
316
317 for ( i = 0; i < n_A; ++i )
318 {
319 dcomplex* restrict a1 = buff_A + (i )*cs_A + (0 )*rs_A;
329
330 /*------------------------------------------------------------*/
331
334
337
341 u, inc_u,
342 z, inc_z,
343 a1, rs_A );
344
345 /*------------------------------------------------------------*/
346
347 }
348
349 return FLA_SUCCESS;
350}
void bl1_zaxpyv2b(int n, dcomplex *alpha1, dcomplex *alpha2, dcomplex *x1, int inc_x1, dcomplex *x2, int inc_x2, dcomplex *y, int inc_y)
Definition bl1_axpyv2b.c:210

References bl1_zaxpyv2b(), i, and psi1.

Referenced by FLA_Bidiag_UT_u_step_ofz_var2(), FLA_Fused_Gerc2_opt_var1(), FLA_Hess_UT_step_ofz_var2(), and FLA_Hess_UT_step_ofz_var3().