libflame revision_anchor
Functions
FLA_Fused_Gerc2_Ahx_Ax_opt_var1.c File Reference

(r)

Functions

FLA_Error FLA_Fused_Gerc2_Ahx_Ax_opt_var1 (FLA_Obj alpha, FLA_Obj u, FLA_Obj y, FLA_Obj z, FLA_Obj A, FLA_Obj x, FLA_Obj v, FLA_Obj w)
 
FLA_Error FLA_Fused_Gerc2_Ahx_Ax_ops_var1 (int m_A, int n_A, float *buff_alpha, float *buff_u, int inc_u, float *buff_y, int inc_y, float *buff_z, int inc_z, float *buff_A, int rs_A, int cs_A, float *buff_x, int inc_x, float *buff_v, int inc_v, float *buff_w, int inc_w)
 
FLA_Error FLA_Fused_Gerc2_Ahx_Ax_opd_var1 (int m_A, int n_A, double *buff_alpha, double *buff_u, int inc_u, double *buff_y, int inc_y, double *buff_z, int inc_z, double *buff_A, int rs_A, int cs_A, double *buff_x, int inc_x, double *buff_v, int inc_v, double *buff_w, int inc_w)
 
FLA_Error FLA_Fused_Gerc2_Ahx_Ax_opc_var1 (int m_A, int n_A, scomplex *buff_alpha, scomplex *buff_u, int inc_u, scomplex *buff_y, int inc_y, scomplex *buff_z, int inc_z, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_x, int inc_x, scomplex *buff_v, int inc_v, scomplex *buff_w, int inc_w)
 
FLA_Error FLA_Fused_Gerc2_Ahx_Ax_opz_var1 (int m_A, int n_A, dcomplex *buff_alpha, dcomplex *buff_u, int inc_u, dcomplex *buff_y, int inc_y, dcomplex *buff_z, int inc_z, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_x, int inc_x, dcomplex *buff_v, int inc_v, dcomplex *buff_w, int inc_w)
 

Function Documentation

◆ FLA_Fused_Gerc2_Ahx_Ax_opc_var1()

FLA_Error FLA_Fused_Gerc2_Ahx_Ax_opc_var1 ( int  m_A,
int  n_A,
scomplex buff_alpha,
scomplex buff_u,
int  inc_u,
scomplex buff_y,
int  inc_y,
scomplex buff_z,
int  inc_z,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_x,
int  inc_x,
scomplex buff_v,
int  inc_v,
scomplex buff_w,
int  inc_w 
)
337{
339 int i;
340
341 bl1_csetv( m_A,
342 buff_0,
343 buff_w, inc_w );
344
345 for ( i = 0; i < n_A; ++i )
346 {
347 scomplex* a1 = buff_A + (i )*cs_A + (0 )*rs_A;
348 scomplex* nu1 = buff_v + (i )*inc_v;
349 scomplex* x = buff_x;
350 scomplex* chi1 = buff_x + (i )*inc_x;
351 scomplex* psi1 = buff_y + (i )*inc_y;
352 scomplex* u = buff_u;
354 scomplex* w = buff_w;
355 scomplex* z = buff_z;
361
362 /*------------------------------------------------------------*/
363
366
369
371 m_A,
372 &temp1,
373 u, inc_u,
374 a1, rs_A );
375/*
376 F77_caxpy( &m_A,
377 &temp1,
378 u, &inc_u,
379 a1, &rs_A );
380*/
381
383 m_A,
384 &temp2,
385 z, inc_z,
386 a1, rs_A );
387/*
388 F77_caxpy( &m_A,
389 &temp2,
390 z, &inc_z,
391 a1, &rs_A );
392*/
393
395 m_A,
396 a1, rs_A,
397 x, inc_x,
398 nu1 );
399
401 m_A,
402 chi1,
403 a1, rs_A,
404 w, inc_w );
405/*
406 F77_caxpy( &m_A,
407 chi1,
408 a1, &rs_A,
409 w, &inc_w );
410*/
411
412 /*------------------------------------------------------------*/
413
414 }
415
416 return FLA_SUCCESS;
417}
FLA_Obj FLA_ZERO
Definition FLA_Init.c:20
double *restrict psi1
Definition bl1_axmyv2.c:141
int i
Definition bl1_axmyv2.c:145
chi1
Definition bl1_axmyv2.c:366
double temp2
Definition bl1_axpyv2b.c:147
double temp1
Definition bl1_axpyv2b.c:146
upsilon1
Definition bl1_axpyv2bdotaxpy.c:225
void bl1_caxpyv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
Definition bl1_axpyv.c:29
void bl1_cdot(conj1_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho)
Definition bl1_dot.c:39
void bl1_csetv(int m, scomplex *sigma, scomplex *x, int incx)
Definition bl1_setv.c:52
@ BLIS1_CONJUGATE
Definition blis_type_defs.h:82
@ BLIS1_NO_CONJUGATE
Definition blis_type_defs.h:81
Definition blis_type_defs.h:133

References bl1_caxpyv(), bl1_cdot(), bl1_csetv(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, chi1, FLA_ZERO, i, psi1, temp1, temp2, and upsilon1.

Referenced by FLA_Fused_Gerc2_Ahx_Ax_opt_var1(), and FLA_Hess_UT_step_ofc_var3().

◆ FLA_Fused_Gerc2_Ahx_Ax_opd_var1()

FLA_Error FLA_Fused_Gerc2_Ahx_Ax_opd_var1 ( int  m_A,
int  n_A,
double buff_alpha,
double buff_u,
int  inc_u,
double buff_y,
int  inc_y,
double buff_z,
int  inc_z,
double buff_A,
int  rs_A,
int  cs_A,
double buff_x,
int  inc_x,
double buff_v,
int  inc_v,
double buff_w,
int  inc_w 
)
258{
259 double zero = bl1_d0();
260
261 double* restrict alpha = buff_alpha;
262 double* restrict u = buff_u;
263 double* restrict z = buff_z;
264 double* restrict x = buff_x;
265 double* restrict w = buff_w;
266
267 double* restrict a1;
268 double* restrict nu1;
269 double* restrict chi1;
270 double* restrict psi1;
271 double* restrict upsilon1;
272
273 double alpha_psi1;
274 double alpha_upsilon1;
275
276 int n_run = n_A / 1;
277 //int n_left = n_A % 1;
278 int step_a1 = 1*cs_A;
279 int step_nu1 = 1*inc_v;
280 int step_chi1 = 1*inc_x;
281 int step_psi1 = 1*inc_y;
282 int step_upsilon1 = 1*inc_u;
283 int i;
284
285 bl1_dsetv( m_A,
286 &zero,
287 buff_w, inc_w );
288
289 a1 = buff_A;
290 nu1 = buff_v;
291 chi1 = buff_x;
292 psi1 = buff_y;
294
295 for ( i = 0; i < n_run; ++i )
296 {
297 /*------------------------------------------------------------*/
298
301
303 &alpha_psi1,
304 u, inc_u,
306 z, inc_z,
307 a1, rs_A,
308 x, inc_x,
309 chi1,
310 nu1,
311 w, inc_w );
312
313 /*------------------------------------------------------------*/
314
315 a1 += step_a1;
316 nu1 += step_nu1;
317 chi1 += step_chi1;
318 psi1 += step_psi1;
320 }
321
322 return FLA_SUCCESS;
323}
int n_run
Definition bl1_axmyv2.c:148
void bl1_daxpyv2bdotaxpy(int n, double *beta, double *u, int inc_u, double *gamma, double *z, int inc_z, double *a, int inc_a, double *x, int inc_x, double *kappa, double *rho, double *w, int inc_w)
Definition bl1_axpyv2bdotaxpy.c:36
void bl1_dsetv(int m, double *sigma, double *x, int incx)
Definition bl1_setv.c:39
double bl1_d0(void)
Definition bl1_constants.c:118

References bl1_d0(), bl1_daxpyv2bdotaxpy(), bl1_dsetv(), chi1, i, n_run, psi1, and upsilon1.

Referenced by FLA_Fused_Gerc2_Ahx_Ax_opt_var1(), and FLA_Hess_UT_step_ofd_var3().

◆ FLA_Fused_Gerc2_Ahx_Ax_ops_var1()

FLA_Error FLA_Fused_Gerc2_Ahx_Ax_ops_var1 ( int  m_A,
int  n_A,
float buff_alpha,
float buff_u,
int  inc_u,
float buff_y,
int  inc_y,
float buff_z,
int  inc_z,
float buff_A,
int  rs_A,
int  cs_A,
float buff_x,
int  inc_x,
float buff_v,
int  inc_v,
float buff_w,
int  inc_w 
)
160{
161 float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
162 int i;
163
164 bl1_ssetv( m_A,
165 buff_0,
166 buff_w, inc_w );
167
168 for ( i = 0; i < n_A; ++i )
169 {
170 float* a1 = buff_A + (i )*cs_A + (0 )*rs_A;
171 float* nu1 = buff_v + (i )*inc_v;
172 float* x = buff_x;
173 float* chi1 = buff_x + (i )*inc_x;
174 float* psi1 = buff_y + (i )*inc_y;
175 float* u = buff_u;
176 float* upsilon1 = buff_u + (i )*inc_u;
177 float* w = buff_w;
178 float* z = buff_z;
179 float* alpha = buff_alpha;
180 float temp1;
181 float temp2;
182
183 /*------------------------------------------------------------*/
184
185 // bl1_scopyconj( psi1, &conj_psi1 );
186 // bl1_smult3( alpha, &conj_psi1, &temp1 );
187 temp1 = *alpha * *psi1;
188
189 // bl1_scopyconj( upsilon1, &conj_upsilon1 );
190 // bl1_smult3( alpha, &conj_upsilon1, &temp2 );
191 temp2 = *alpha * *upsilon1;
192
194 m_A,
195 &temp1,
196 u, inc_u,
197 a1, rs_A );
198/*
199 F77_saxpy( &m_A,
200 &temp1,
201 u, &inc_u,
202 a1, &rs_A );
203*/
204
206 m_A,
207 &temp2,
208 z, inc_z,
209 a1, rs_A );
210/*
211 F77_saxpy( &m_A,
212 &temp2,
213 z, &inc_z,
214 a1, &rs_A );
215*/
216
218 m_A,
219 a1, rs_A,
220 x, inc_x,
221 nu1 );
222/*
223 *nu1 = F77_sdot( &m_A,
224 a1, &rs_A,
225 x, &inc_x );
226*/
227
229 m_A,
230 chi1,
231 a1, rs_A,
232 w, inc_w );
233/*
234 F77_saxpy( &m_A,
235 chi1,
236 a1, &rs_A,
237 w, &inc_w );
238*/
239 /*------------------------------------------------------------*/
240
241 }
242
243 return FLA_SUCCESS;
244}
void bl1_saxpyv(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy)
Definition bl1_axpyv.c:13
void bl1_sdot(conj1_t conj, int n, float *x, int incx, float *y, int incy, float *rho)
Definition bl1_dot.c:13
void bl1_ssetv(int m, float *sigma, float *x, int incx)
Definition bl1_setv.c:26

References bl1_saxpyv(), bl1_sdot(), bl1_ssetv(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, chi1, FLA_ZERO, i, psi1, temp1, temp2, and upsilon1.

Referenced by FLA_Fused_Gerc2_Ahx_Ax_opt_var1(), and FLA_Hess_UT_step_ofs_var3().

◆ FLA_Fused_Gerc2_Ahx_Ax_opt_var1()

FLA_Error FLA_Fused_Gerc2_Ahx_Ax_opt_var1 ( FLA_Obj  alpha,
FLA_Obj  u,
FLA_Obj  y,
FLA_Obj  z,
FLA_Obj  A,
FLA_Obj  x,
FLA_Obj  v,
FLA_Obj  w 
)
14{
15/*
16 Effective computation:
17 A = A + alpha * ( u * y' + z * u' );
18 v = A' * x;
19 w = A * x;
20*/
21 FLA_Datatype datatype;
22 int m_A, n_A;
23 int rs_A, cs_A;
25
26 datatype = FLA_Obj_datatype( A );
27
28 m_A = FLA_Obj_length( A );
29 n_A = FLA_Obj_width( A );
30
33
40
41
42 switch ( datatype )
43 {
44 case FLA_FLOAT:
45 {
46 float* buff_A = FLA_FLOAT_PTR( A );
47 float* buff_u = FLA_FLOAT_PTR( u );
48 float* buff_y = FLA_FLOAT_PTR( y );
49 float* buff_z = FLA_FLOAT_PTR( z );
50 float* buff_x = FLA_FLOAT_PTR( x );
51 float* buff_v = FLA_FLOAT_PTR( v );
52 float* buff_w = FLA_FLOAT_PTR( w );
53 float* buff_alpha = FLA_FLOAT_PTR( alpha );
54
56 n_A,
64 buff_w, inc_w );
65
66 break;
67 }
68
69 case FLA_DOUBLE:
70 {
71 double* buff_A = FLA_DOUBLE_PTR( A );
72 double* buff_u = FLA_DOUBLE_PTR( u );
73 double* buff_y = FLA_DOUBLE_PTR( y );
74 double* buff_z = FLA_DOUBLE_PTR( z );
75 double* buff_x = FLA_DOUBLE_PTR( x );
76 double* buff_v = FLA_DOUBLE_PTR( v );
77 double* buff_w = FLA_DOUBLE_PTR( w );
78 double* buff_alpha = FLA_DOUBLE_PTR( alpha );
79
81 n_A,
89 buff_w, inc_w );
90
91 break;
92 }
93
94 case FLA_COMPLEX:
95 {
104
106 n_A,
108 buff_u, inc_u,
109 buff_y, inc_y,
110 buff_z, inc_z,
111 buff_A, rs_A, cs_A,
112 buff_x, inc_x,
113 buff_v, inc_v,
114 buff_w, inc_w );
115
116 break;
117 }
118
120 {
129
131 n_A,
133 buff_u, inc_u,
134 buff_y, inc_y,
135 buff_z, inc_z,
136 buff_A, rs_A, cs_A,
137 buff_x, inc_x,
138 buff_v, inc_v,
139 buff_w, inc_w );
140
141 break;
142 }
143 }
144
145 return FLA_SUCCESS;
146}
FLA_Error FLA_Fused_Gerc2_Ahx_Ax_opz_var1(int m_A, int n_A, dcomplex *buff_alpha, dcomplex *buff_u, int inc_u, dcomplex *buff_y, int inc_y, dcomplex *buff_z, int inc_z, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_x, int inc_x, dcomplex *buff_v, int inc_v, dcomplex *buff_w, int inc_w)
Definition FLA_Fused_Gerc2_Ahx_Ax_opt_var1.c:421
FLA_Error FLA_Fused_Gerc2_Ahx_Ax_opd_var1(int m_A, int n_A, double *buff_alpha, double *buff_u, int inc_u, double *buff_y, int inc_y, double *buff_z, int inc_z, double *buff_A, int rs_A, int cs_A, double *buff_x, int inc_x, double *buff_v, int inc_v, double *buff_w, int inc_w)
Definition FLA_Fused_Gerc2_Ahx_Ax_opt_var1.c:248
FLA_Error FLA_Fused_Gerc2_Ahx_Ax_opc_var1(int m_A, int n_A, scomplex *buff_alpha, scomplex *buff_u, int inc_u, scomplex *buff_y, int inc_y, scomplex *buff_z, int inc_z, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_x, int inc_x, scomplex *buff_v, int inc_v, scomplex *buff_w, int inc_w)
Definition FLA_Fused_Gerc2_Ahx_Ax_opt_var1.c:327
FLA_Error FLA_Fused_Gerc2_Ahx_Ax_ops_var1(int m_A, int n_A, float *buff_alpha, float *buff_u, int inc_u, float *buff_y, int inc_y, float *buff_z, int inc_z, float *buff_A, int rs_A, int cs_A, float *buff_x, int inc_x, float *buff_v, int inc_v, float *buff_w, int inc_w)
Definition FLA_Fused_Gerc2_Ahx_Ax_opt_var1.c:150
dim_t FLA_Obj_width(FLA_Obj obj)
Definition FLA_Query.c:123
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition FLA_Query.c:167
dim_t FLA_Obj_length(FLA_Obj obj)
Definition FLA_Query.c:116
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition FLA_Query.c:174
dim_t FLA_Obj_vector_inc(FLA_Obj obj)
Definition FLA_Query.c:145
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition FLA_Query.c:13
int FLA_Datatype
Definition FLA_type_defs.h:49
Definition blis_type_defs.h:138

References FLA_Fused_Gerc2_Ahx_Ax_opc_var1(), FLA_Fused_Gerc2_Ahx_Ax_opd_var1(), FLA_Fused_Gerc2_Ahx_Ax_ops_var1(), FLA_Fused_Gerc2_Ahx_Ax_opz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), FLA_Obj_width(), and i.

◆ FLA_Fused_Gerc2_Ahx_Ax_opz_var1()

FLA_Error FLA_Fused_Gerc2_Ahx_Ax_opz_var1 ( int  m_A,
int  n_A,
dcomplex buff_alpha,
dcomplex buff_u,
int  inc_u,
dcomplex buff_y,
int  inc_y,
dcomplex buff_z,
int  inc_z,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_x,
int  inc_x,
dcomplex buff_v,
int  inc_v,
dcomplex buff_w,
int  inc_w 
)
431{
432 dcomplex zero = bl1_z0();
433
439
445
450
451 int n_run = n_A / 1;
452 //int n_left = n_A % 1;
453 int step_a1 = 1*cs_A;
454 int step_nu1 = 1*inc_v;
455 int step_chi1 = 1*inc_x;
456 int step_psi1 = 1*inc_y;
457 int step_upsilon1 = 1*inc_u;
458 int i;
459
460 bl1_zsetv( m_A,
461 &zero,
462 buff_w, inc_w );
463
464 a1 = buff_A;
465 nu1 = buff_v;
466 chi1 = buff_x;
467 psi1 = buff_y;
469
470 for ( i = 0; i < n_run; ++i )
471 {
472 /*------------------------------------------------------------*/
473
476
479
480/*
481 bl1_zaxpyv2bdotaxpy( m_A,
482 &temp1,
483 u, inc_u,
484 &temp2,
485 z, inc_z,
486 a1, rs_A,
487 x, inc_x,
488 chi1,
489 nu1,
490 w, inc_w );
491*/
492
494 &temp1,
495 &temp2,
496 u, inc_u,
497 z, inc_z,
498 a1, rs_A );
500 a1, rs_A,
501 x, inc_x,
502 chi1,
503 nu1,
504 w, inc_w );
505
506 /*------------------------------------------------------------*/
507
508 a1 += step_a1;
509 nu1 += step_nu1;
510 chi1 += step_chi1;
511 psi1 += step_psi1;
513 }
514
515 return FLA_SUCCESS;
516}
void bl1_zaxpyv2b(int n, dcomplex *alpha1, dcomplex *alpha2, dcomplex *x1, int inc_x1, dcomplex *x2, int inc_x2, dcomplex *y, int inc_y)
Definition bl1_axpyv2b.c:210
void bl1_zdotaxpy(int n, dcomplex *a, int inc_a, dcomplex *x, int inc_x, dcomplex *kappa, dcomplex *rho, dcomplex *w, int inc_w)
Definition bl1_dotaxpy.c:258
dcomplex bl1_z0(void)
Definition bl1_constants.c:133
void bl1_zsetv(int m, dcomplex *sigma, dcomplex *x, int incx)
Definition bl1_setv.c:66

References bl1_z0(), bl1_zaxpyv2b(), bl1_zdotaxpy(), bl1_zsetv(), chi1, i, n_run, psi1, temp1, temp2, and upsilon1.

Referenced by FLA_Fused_Gerc2_Ahx_Ax_opt_var1(), and FLA_Hess_UT_step_ofz_var3().