libflame revision_anchor
Functions
FLA_Fused_Ahx_Axpy_Ax_opt_var1.c File Reference

(r)

Functions

FLA_Error FLA_Fused_Ahx_Axpy_Ax_opt_var1 (FLA_Obj A, FLA_Obj u, FLA_Obj tau, FLA_Obj a, FLA_Obj beta, FLA_Obj y, FLA_Obj w)
 
FLA_Error FLA_Fused_Ahx_Axpy_Ax_ops_var1 (int m_A, int n_A, float *buff_tau, float *buff_beta, float *buff_A, int rs_A, int cs_A, float *buff_u, int inc_u, float *buff_a, int inc_a, float *buff_y, int inc_y, float *buff_w, int inc_w)
 
FLA_Error FLA_Fused_Ahx_Axpy_Ax_opd_var1 (int m_A, int n_A, double *buff_tau, double *buff_beta, double *buff_A, int rs_A, int cs_A, double *buff_u, int inc_u, double *buff_a, int inc_a, double *buff_y, int inc_y, double *buff_w, int inc_w)
 
FLA_Error FLA_Fused_Ahx_Axpy_Ax_opc_var1 (int m_A, int n_A, scomplex *buff_tau, scomplex *buff_beta, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_u, int inc_u, scomplex *buff_a, int inc_a, scomplex *buff_y, int inc_y, scomplex *buff_w, int inc_w)
 
FLA_Error FLA_Fused_Ahx_Axpy_Ax_opz_var1 (int m_A, int n_A, dcomplex *buff_tau, dcomplex *buff_beta, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_u, int inc_u, dcomplex *buff_a, int inc_a, dcomplex *buff_y, int inc_y, dcomplex *buff_w, int inc_w)
 

Function Documentation

◆ FLA_Fused_Ahx_Axpy_Ax_opc_var1()

FLA_Error FLA_Fused_Ahx_Axpy_Ax_opc_var1 ( int  m_A,
int  n_A,
scomplex buff_tau,
scomplex buff_beta,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_u,
int  inc_u,
scomplex buff_a,
int  inc_a,
scomplex buff_y,
int  inc_y,
scomplex buff_w,
int  inc_w 
)
331{
338 int i;
339
340 bl1_csetv( m_A,
341 buff_0,
342 buff_w, inc_w );
343
345
346 for ( i = 0; i < n_A; ++i )
347 {
348 scomplex* a1 = buff_A + (i )*cs_A + (0 )*rs_A;
349 scomplex* psi1 = buff_y + (i )*inc_y;
350 scomplex* alpha1 = buff_a + (i )*inc_a;
351 scomplex* u = buff_u;
352 scomplex* w = buff_w;
353
354 /*------------------------------------------------------------*/
355
357 m_A,
358 buff_1,
359 a1, rs_A,
360 u, inc_u,
361 buff_beta,
362 psi1 );
363
366
368
370 m_A,
372 a1, rs_A,
373 w, inc_w );
374/*
375 F77_caxpy( &m_A,
376 &conj_alpha1,
377 a1, &rs_A,
378 w, &inc_w );
379*/
380
381 /*------------------------------------------------------------*/
382
383 }
384
385 return FLA_SUCCESS;
386}
FLA_Obj FLA_MINUS_ONE
Definition FLA_Init.c:22
FLA_Obj FLA_ZERO
Definition FLA_Init.c:20
FLA_Obj FLA_ONE
Definition FLA_Init.c:18
double *restrict psi1
Definition bl1_axmyv2.c:141
int i
Definition bl1_axmyv2.c:145
double *restrict alpha1
Definition bl1_axpyv2bdotaxpy.c:198
void bl1_caxpyv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
Definition bl1_axpyv.c:29
void bl1_cdots(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy, scomplex *beta, scomplex *rho)
Definition bl1_dots.c:39
void bl1_csetv(int m, scomplex *sigma, scomplex *x, int incx)
Definition bl1_setv.c:52
@ BLIS1_CONJUGATE
Definition blis_type_defs.h:82
@ BLIS1_NO_CONJUGATE
Definition blis_type_defs.h:81
Definition blis_type_defs.h:133

References alpha1, bl1_caxpyv(), bl1_cdots(), bl1_csetv(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, i, and psi1.

Referenced by FLA_Bidiag_UT_u_step_ofc_var3(), FLA_Bidiag_UT_u_step_ofc_var4(), and FLA_Fused_Ahx_Axpy_Ax_opt_var1().

◆ FLA_Fused_Ahx_Axpy_Ax_opd_var1()

FLA_Error FLA_Fused_Ahx_Axpy_Ax_opd_var1 ( int  m_A,
int  n_A,
double buff_tau,
double buff_beta,
double buff_A,
int  rs_A,
int  cs_A,
double buff_u,
int  inc_u,
double buff_a,
int  inc_a,
double buff_y,
int  inc_y,
double buff_w,
int  inc_w 
)
216{
217 double zero = bl1_d0();
218 double minus_one = bl1_dm1();
219 double* restrict u = buff_u;
220 double* restrict w = buff_w;
221 double* restrict beta = buff_beta;
222 double* restrict a1;
223 double* restrict a2;
224 double* restrict psi1;
225 double* restrict psi2;
226 double* restrict alpha1;
227 double* restrict alpha2;
228
229 double minus_inv_tau;
230 int i;
231
232 int n_run = n_A / 2;
233 int n_left = n_A % 2;
234 int stepcs_A = 2*cs_A;
235 int stepinc_y = 2*inc_y;
236 int stepinc_a = 2*inc_a;
237
238
239 bl1_dsetv( m_A,
240 &zero,
241 buff_w, inc_w );
242
244
245 a1 = buff_A;
246 a2 = buff_A + cs_A;
247 psi1 = buff_y;
248 psi2 = buff_y + inc_y;
249 alpha1 = buff_a;
250 alpha2 = buff_a + inc_a;
251
252 for ( i = 0; i < n_run; ++i )
253 {
254/*
255 Effective computation:
256 y = beta * y + A' * u;
257 a = a - conj(y) / tau;
258 w = A * conj(a);
259*/
260 /*------------------------------------------------------------*/
261
263 m_A,
264 a1, rs_A,
265 a2, rs_A,
266 u, inc_u,
267 beta,
268 psi1,
269 psi2 );
270
273
275 alpha1,
276 alpha2,
277 a1, rs_A,
278 a2, rs_A,
279 w, inc_w );
280
281 /*------------------------------------------------------------*/
282
283 a1 += stepcs_A;
284 a2 += stepcs_A;
285 psi1 += stepinc_y;
286 psi2 += stepinc_y;
287 alpha1 += stepinc_a;
288 alpha2 += stepinc_a;
289 }
290
291 if ( n_left == 1 )
292 //for ( i = 0; i < n_left; ++i )
293 {
294 double rho1;
295
297 m_A,
298 a1, rs_A,
299 u, inc_u,
300 &rho1 );
302 bl1_dadd3( psi1, &rho1, psi1 );
303
305
307 m_A,
308 alpha1,
309 a1, rs_A,
310 w, inc_w );
311
312 //a1 += cs_A;
313 //psi1 += inc_y;
314 //alpha1 += inc_a;
315 }
316
317 return FLA_SUCCESS;
318}
int n_left
Definition bl1_axmyv2.c:149
int n_run
Definition bl1_axmyv2.c:148
void bl1_daxpyv2b(int n, double *alpha1, double *alpha2, double *x1, int inc_x1, double *x2, int inc_x2, double *y, int inc_y)
Definition bl1_axpyv2b.c:31
void bl1_daxpyv(conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy)
Definition bl1_axpyv.c:21
void bl1_ddot(conj1_t conj, int n, double *x, int incx, double *y, int incy, double *rho)
Definition bl1_dot.c:26
double rho1
Definition bl1_dotsv2.c:149
void bl1_ddotsv2(conj1_t conjxy, int n, double *x, int inc_x, double *y, int inc_y, double *z, int inc_z, double *beta, double *rho_xz, double *rho_yz)
Definition bl1_dotsv2.c:35
double *restrict alpha2
Definition bl1_dotv2axpyv2b.c:188
double bl1_dm1(void)
Definition bl1_constants.c:182
void bl1_dsetv(int m, double *sigma, double *x, int incx)
Definition bl1_setv.c:39
double bl1_d0(void)
Definition bl1_constants.c:118

References alpha1, alpha2, bl1_d0(), bl1_daxpyv(), bl1_daxpyv2b(), bl1_ddot(), bl1_ddotsv2(), bl1_dm1(), bl1_dsetv(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, i, n_left, n_run, psi1, and rho1.

Referenced by FLA_Bidiag_UT_u_step_ofd_var3(), FLA_Bidiag_UT_u_step_ofd_var4(), and FLA_Fused_Ahx_Axpy_Ax_opt_var1().

◆ FLA_Fused_Ahx_Axpy_Ax_ops_var1()

FLA_Error FLA_Fused_Ahx_Axpy_Ax_ops_var1 ( int  m_A,
int  n_A,
float buff_tau,
float buff_beta,
float buff_A,
int  rs_A,
int  cs_A,
float buff_u,
int  inc_u,
float buff_a,
int  inc_a,
float buff_y,
int  inc_y,
float buff_w,
int  inc_w 
)
152{
153 float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
154 float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
156 float minus_inv_tau;
157 int i;
158
159 bl1_ssetv( m_A,
160 buff_0,
161 buff_w, inc_w );
162
164
165 for ( i = 0; i < n_A; ++i )
166 {
167 float* a1 = buff_A + (i )*cs_A + (0 )*rs_A;
168 float* psi1 = buff_y + (i )*inc_y;
169 float* alpha1 = buff_a + (i )*inc_a;
170 float* u = buff_u;
171 float* w = buff_w;
172
173 /*------------------------------------------------------------*/
174
176 m_A,
177 buff_1,
178 a1, rs_A,
179 u, inc_u,
180 buff_beta,
181 psi1 );
182
183 // bl1_dmult4( &minus_inv_tau, conj_psi1, alpha1, alpha1 );
185
187 m_A,
188 alpha1,
189 a1, rs_A,
190 w, inc_w );
191/*
192 F77_saxpy( &m_A,
193 alpha1,
194 a1, &rs_A,
195 w, &inc_w );
196*/
197
198 /*------------------------------------------------------------*/
199
200 }
201
202 return FLA_SUCCESS;
203}
void bl1_saxpyv(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy)
Definition bl1_axpyv.c:13
void bl1_sdots(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy, float *beta, float *rho)
Definition bl1_dots.c:13
void bl1_ssetv(int m, float *sigma, float *x, int incx)
Definition bl1_setv.c:26

References alpha1, bl1_saxpyv(), bl1_sdots(), bl1_ssetv(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, i, and psi1.

Referenced by FLA_Bidiag_UT_u_step_ofs_var3(), FLA_Bidiag_UT_u_step_ofs_var4(), and FLA_Fused_Ahx_Axpy_Ax_opt_var1().

◆ FLA_Fused_Ahx_Axpy_Ax_opt_var1()

FLA_Error FLA_Fused_Ahx_Axpy_Ax_opt_var1 ( FLA_Obj  A,
FLA_Obj  u,
FLA_Obj  tau,
FLA_Obj  a,
FLA_Obj  beta,
FLA_Obj  y,
FLA_Obj  w 
)
14{
15/*
16 Effective computation:
17 y = beta * y + A' * u;
18 a = a - conj(y) / tau;
19 w = A * conj(a);
20*/
21 FLA_Datatype datatype;
22 int m_A, n_A;
23 int rs_A, cs_A;
24 int inc_u, inc_a, inc_y, inc_w;
25
26 datatype = FLA_Obj_datatype( A );
27
28 m_A = FLA_Obj_length( A );
29 n_A = FLA_Obj_width( A );
30
33
35
37
39
41
42
43 switch ( datatype )
44 {
45 case FLA_FLOAT:
46 {
47 float* buff_A = FLA_FLOAT_PTR( A );
48 float* buff_u = FLA_FLOAT_PTR( u );
49 float* buff_a = FLA_FLOAT_PTR( a );
50 float* buff_y = FLA_FLOAT_PTR( y );
51 float* buff_w = FLA_FLOAT_PTR( w );
52 float* buff_tau = FLA_FLOAT_PTR( tau );
53 float* buff_beta = FLA_FLOAT_PTR( beta );
54
56 n_A,
63 buff_w, inc_w );
64
65 break;
66 }
67
68 case FLA_DOUBLE:
69 {
70 double* buff_A = FLA_DOUBLE_PTR( A );
71 double* buff_u = FLA_DOUBLE_PTR( u );
72 double* buff_a = FLA_DOUBLE_PTR( a );
73 double* buff_y = FLA_DOUBLE_PTR( y );
74 double* buff_w = FLA_DOUBLE_PTR( w );
75 double* buff_tau = FLA_DOUBLE_PTR( tau );
76 double* buff_beta = FLA_DOUBLE_PTR( beta );
77
79 n_A,
86 buff_w, inc_w );
87
88 break;
89 }
90
91 case FLA_COMPLEX:
92 {
100
102 n_A,
103 buff_tau,
104 buff_beta,
105 buff_A, rs_A, cs_A,
106 buff_u, inc_u,
107 buff_a, inc_a,
108 buff_y, inc_y,
109 buff_w, inc_w );
110
111 break;
112 }
113
115 {
123
125 n_A,
126 buff_tau,
127 buff_beta,
128 buff_A, rs_A, cs_A,
129 buff_u, inc_u,
130 buff_a, inc_a,
131 buff_y, inc_y,
132 buff_w, inc_w );
133
134 break;
135 }
136 }
137
138 return FLA_SUCCESS;
139}
FLA_Error FLA_Fused_Ahx_Axpy_Ax_opd_var1(int m_A, int n_A, double *buff_tau, double *buff_beta, double *buff_A, int rs_A, int cs_A, double *buff_u, int inc_u, double *buff_a, int inc_a, double *buff_y, int inc_y, double *buff_w, int inc_w)
Definition FLA_Fused_Ahx_Axpy_Ax_opt_var1.c:207
FLA_Error FLA_Fused_Ahx_Axpy_Ax_opc_var1(int m_A, int n_A, scomplex *buff_tau, scomplex *buff_beta, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_u, int inc_u, scomplex *buff_a, int inc_a, scomplex *buff_y, int inc_y, scomplex *buff_w, int inc_w)
Definition FLA_Fused_Ahx_Axpy_Ax_opt_var1.c:322
FLA_Error FLA_Fused_Ahx_Axpy_Ax_ops_var1(int m_A, int n_A, float *buff_tau, float *buff_beta, float *buff_A, int rs_A, int cs_A, float *buff_u, int inc_u, float *buff_a, int inc_a, float *buff_y, int inc_y, float *buff_w, int inc_w)
Definition FLA_Fused_Ahx_Axpy_Ax_opt_var1.c:143
FLA_Error FLA_Fused_Ahx_Axpy_Ax_opz_var1(int m_A, int n_A, dcomplex *buff_tau, dcomplex *buff_beta, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_u, int inc_u, dcomplex *buff_a, int inc_a, dcomplex *buff_y, int inc_y, dcomplex *buff_w, int inc_w)
Definition FLA_Fused_Ahx_Axpy_Ax_opt_var1.c:390
dim_t FLA_Obj_width(FLA_Obj obj)
Definition FLA_Query.c:123
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition FLA_Query.c:167
dim_t FLA_Obj_length(FLA_Obj obj)
Definition FLA_Query.c:116
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition FLA_Query.c:174
dim_t FLA_Obj_vector_inc(FLA_Obj obj)
Definition FLA_Query.c:145
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition FLA_Query.c:13
int FLA_Datatype
Definition FLA_type_defs.h:49
Definition blis_type_defs.h:138

References FLA_Fused_Ahx_Axpy_Ax_opc_var1(), FLA_Fused_Ahx_Axpy_Ax_opd_var1(), FLA_Fused_Ahx_Axpy_Ax_ops_var1(), FLA_Fused_Ahx_Axpy_Ax_opz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), FLA_Obj_width(), and i.

◆ FLA_Fused_Ahx_Axpy_Ax_opz_var1()

FLA_Error FLA_Fused_Ahx_Axpy_Ax_opz_var1 ( int  m_A,
int  n_A,
dcomplex buff_tau,
dcomplex buff_beta,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_u,
int  inc_u,
dcomplex buff_a,
int  inc_a,
dcomplex buff_y,
int  inc_y,
dcomplex buff_w,
int  inc_w 
)
399{
400 dcomplex zero = bl1_z0();
411
417 int i;
418 int n_run = n_A / 2;
419 int n_left = n_A % 2;
420 int twocs_A = 2*cs_A;
421 int twoinc_y = 2*inc_y;
422 int twoinc_a = 2*inc_a;
423
424
425 bl1_zsetv( m_A,
426 &zero,
427 buff_w, inc_w );
428
430
431 a1 = buff_A;
432 a2 = buff_A + cs_A;
433 psi1 = buff_y;
434 psi2 = buff_y + inc_y;
435 alpha1 = buff_a;
436 alpha2 = buff_a + inc_a;
437
438 for ( i = 0; i < n_run; ++i )
439 {
440/*
441 Effective computation:
442 y = beta * y + A' * u;
443 a = a - conj(y) / tau;
444 w = A * conj(a);
445*/
446 /*------------------------------------------------------------*/
447
449 m_A,
450 a1, rs_A,
451 a2, rs_A,
452 u, inc_u,
453 beta,
454 psi1,
455 psi2 );
456
463
467 a1, rs_A,
468 a2, rs_A,
469 w, inc_w );
470
471 /*------------------------------------------------------------*/
472
473 a1 += twocs_A;
474 a2 += twocs_A;
475 psi1 += twoinc_y;
476 psi2 += twoinc_y;
477 alpha1 += twoinc_a;
478 alpha2 += twoinc_a;
479 }
480
481 if ( n_left == 1 )
482 {
484
486 m_A,
487 a1, rs_A,
488 u, inc_u,
489 &rho1 );
491 bl1_zadd3( psi1, &rho1, psi1 );
492
496
498 m_A,
500 a1, rs_A,
501 w, inc_w );
502 }
503
504 return FLA_SUCCESS;
505}
int twoinc_y
Definition bl1_axpyv2b.c:154
void bl1_zaxpyv2b(int n, dcomplex *alpha1, dcomplex *alpha2, dcomplex *x1, int inc_x1, dcomplex *x2, int inc_x2, dcomplex *y, int inc_y)
Definition bl1_axpyv2b.c:210
void bl1_zaxpyv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy)
Definition bl1_axpyv.c:60
void bl1_zdot(conj1_t conj, int n, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *rho)
Definition bl1_dot.c:65
bl1_zscals(beta, rho_yz)
void bl1_zdotsv2(conj1_t conjxy, int n, dcomplex *x, int inc_x, dcomplex *y, int inc_y, dcomplex *z, int inc_z, dcomplex *beta, dcomplex *rho_xz, dcomplex *rho_yz)
Definition bl1_dotsv2.c:248
dcomplex bl1_z0(void)
Definition bl1_constants.c:133
void bl1_zsetv(int m, dcomplex *sigma, dcomplex *x, int incx)
Definition bl1_setv.c:66
dcomplex bl1_zm1(void)
Definition bl1_constants.c:197

References alpha1, alpha2, bl1_z0(), bl1_zaxpyv(), bl1_zaxpyv2b(), bl1_zdot(), bl1_zdotsv2(), bl1_zm1(), bl1_zscals(), bl1_zsetv(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, i, n_left, n_run, psi1, rho1, and twoinc_y.

Referenced by FLA_Bidiag_UT_u_step_ofz_var3(), FLA_Bidiag_UT_u_step_ofz_var4(), and FLA_Fused_Ahx_Axpy_Ax_opt_var1().