libflame revision_anchor
Functions
FLA_Fused_Ahx_Ax_opt_var1.c File Reference

(r)

Functions

FLA_Error FLA_Fused_Ahx_Ax_opt_var1 (FLA_Obj A, FLA_Obj x, FLA_Obj v, FLA_Obj w)
 
FLA_Error FLA_Fused_Ahx_Ax_ops_var1 (int m_A, int n_A, float *buff_A, int rs_A, int cs_A, float *buff_x, int inc_x, float *buff_v, int inc_v, float *buff_w, int inc_w)
 
FLA_Error FLA_Fused_Ahx_Ax_opd_var1 (int m_A, int n_A, double *buff_A, int rs_A, int cs_A, double *buff_x, int inc_x, double *buff_v, int inc_v, double *buff_w, int inc_w)
 
FLA_Error FLA_Fused_Ahx_Ax_opc_var1 (int m_A, int n_A, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_x, int inc_x, scomplex *buff_v, int inc_v, scomplex *buff_w, int inc_w)
 
FLA_Error FLA_Fused_Ahx_Ax_opz_var1 (int m_A, int n_A, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_x, int inc_x, dcomplex *buff_v, int inc_v, dcomplex *buff_w, int inc_w)
 

Function Documentation

◆ FLA_Fused_Ahx_Ax_opc_var1()

FLA_Error FLA_Fused_Ahx_Ax_opc_var1 ( int  m_A,
int  n_A,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_x,
int  inc_x,
scomplex buff_v,
int  inc_v,
scomplex buff_w,
int  inc_w 
)
262{
264 int i;
265
266 bl1_csetv( m_A,
267 buff_0,
268 buff_w, inc_w );
269
270 for ( i = 0; i < n_A; ++i )
271 {
272 scomplex* a1 = buff_A + (i )*cs_A + (0 )*rs_A;
273 scomplex* nu1 = buff_v + (i )*inc_v;
274 scomplex* x = buff_x;
275 scomplex* chi1 = buff_x + (i )*inc_x;
276 scomplex* w = buff_w;
277
278 /*------------------------------------------------------------*/
279
281 m_A,
282 a1, rs_A,
283 x, inc_x,
284 nu1 );
285
287 m_A,
288 chi1,
289 a1, rs_A,
290 w, inc_w );
291/*
292 F77_caxpy( &m_A,
293 chi1,
294 a1, &rs_A,
295 w, &inc_w );
296*/
297
298 /*------------------------------------------------------------*/
299
300 }
301
302 return FLA_SUCCESS;
303}
FLA_Obj FLA_ZERO
Definition FLA_Init.c:20
int i
Definition bl1_axmyv2.c:145
chi1
Definition bl1_axmyv2.c:366
void bl1_caxpyv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
Definition bl1_axpyv.c:29
void bl1_cdot(conj1_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho)
Definition bl1_dot.c:39
void bl1_csetv(int m, scomplex *sigma, scomplex *x, int incx)
Definition bl1_setv.c:52
@ BLIS1_CONJUGATE
Definition blis_type_defs.h:82
@ BLIS1_NO_CONJUGATE
Definition blis_type_defs.h:81
Definition blis_type_defs.h:133

References bl1_caxpyv(), bl1_cdot(), bl1_csetv(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, chi1, FLA_ZERO, and i.

Referenced by FLA_Fused_Ahx_Ax_opt_var1(), FLA_Hess_UT_step_ofc_var2(), FLA_Hess_UT_step_ofc_var3(), and FLA_Hess_UT_step_ofc_var4().

◆ FLA_Fused_Ahx_Ax_opd_var1()

FLA_Error FLA_Fused_Ahx_Ax_opd_var1 ( int  m_A,
int  n_A,
double buff_A,
int  rs_A,
int  cs_A,
double buff_x,
int  inc_x,
double buff_v,
int  inc_v,
double buff_w,
int  inc_w 
)
179{
180 double zero = bl1_d0();
181 int i;
182
183 double* restrict w = buff_w;
184 double* restrict x = buff_x;
185
186 double* restrict a1;
187 double* restrict a2;
188 double* restrict nu1;
189 double* restrict nu2;
190 double* restrict chi1;
191 double* restrict chi2;
192
193 int n_run = n_A / 2;
194 int n_left = n_A % 2;
195 int step_a1 = 2*cs_A;
196 int step_nu1 = 2*inc_v;
197 int step_chi1 = 2*inc_x;
198
199 bl1_dsetv( m_A,
200 &zero,
201 buff_w, inc_w );
202
203 a1 = buff_A;
204 a2 = buff_A + cs_A;
205 nu1 = buff_v;
206 nu2 = buff_v + inc_v;
207 chi1 = buff_x;
208 chi2 = buff_x + inc_x;
209
210 for ( i = 0; i < n_run; ++i )
211 {
212 /*------------------------------------------------------------*/
213
215 a1, rs_A,
216 a2, rs_A,
217 x, inc_x,
218 chi1,
219 chi2,
220 nu1,
221 nu2,
222 w, inc_w );
223
224 /*------------------------------------------------------------*/
225
226 a1 += step_a1;
227 a2 += step_a1;
228 nu1 += step_nu1;
229 nu2 += step_nu1;
230 chi1 += step_chi1;
231 chi2 += step_chi1;
232 }
233
234 if ( n_left > 0 )
235 {
236 for ( i = 0; i < n_left; ++i )
237 {
239 a1, rs_A,
240 x, inc_x,
241 chi1,
242 nu1,
243 w, inc_w );
244
245 a1 += rs_A;
246 nu1 += inc_v;
247 chi1 += inc_x;
248 }
249 }
250
251 return FLA_SUCCESS;
252}
int n_left
Definition bl1_axmyv2.c:149
int n_run
Definition bl1_axmyv2.c:148
double *restrict chi2
Definition bl1_axpyv2b.c:142
void bl1_ddotaxpy(int n, double *a, int inc_a, double *x, int inc_x, double *kappa, double *rho, double *w, int inc_w)
Definition bl1_dotaxpy.c:31
void bl1_ddotv2axpyv2b(int n, double *a1, int inc_a1, double *a2, int inc_a2, double *x, int inc_x, double *kappa1, double *kappa2, double *rho1, double *rho2, double *w, int inc_w)
Definition bl1_dotv2axpyv2b.c:36
void bl1_dsetv(int m, double *sigma, double *x, int incx)
Definition bl1_setv.c:39
double bl1_d0(void)
Definition bl1_constants.c:118

References bl1_d0(), bl1_ddotaxpy(), bl1_ddotv2axpyv2b(), bl1_dsetv(), chi1, chi2, i, n_left, and n_run.

Referenced by FLA_Fused_Ahx_Ax_opt_var1(), FLA_Hess_UT_step_ofd_var2(), FLA_Hess_UT_step_ofd_var3(), and FLA_Hess_UT_step_ofd_var4().

◆ FLA_Fused_Ahx_Ax_ops_var1()

FLA_Error FLA_Fused_Ahx_Ax_ops_var1 ( int  m_A,
int  n_A,
float buff_A,
int  rs_A,
int  cs_A,
float buff_x,
int  inc_x,
float buff_v,
int  inc_v,
float buff_w,
int  inc_w 
)
122{
123 float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
124 int i;
125
126 bl1_ssetv( m_A,
127 buff_0,
128 buff_w, inc_w );
129
130 for ( i = 0; i < n_A; ++i )
131 {
132 float* a1 = buff_A + (i )*cs_A + (0 )*rs_A;
133 float* nu1 = buff_v + (i )*inc_v;
134 float* x = buff_x;
135 float* chi1 = buff_x + (i )*inc_x;
136 float* w = buff_w;
137
138 /*------------------------------------------------------------*/
139
141 m_A,
142 a1, rs_A,
143 x, inc_x,
144 nu1 );
145/*
146 *nu1 = F77_sdot( &m_A,
147 a1, &rs_A,
148 x, &inc_x );
149*/
150
152 m_A,
153 chi1,
154 a1, rs_A,
155 w, inc_w );
156/*
157 F77_saxpy( &m_A,
158 chi1,
159 a1, &rs_A,
160 w, &inc_w );
161*/
162
163 /*------------------------------------------------------------*/
164
165 }
166
167
168 return FLA_SUCCESS;
169}
void bl1_saxpyv(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy)
Definition bl1_axpyv.c:13
void bl1_sdot(conj1_t conj, int n, float *x, int incx, float *y, int incy, float *rho)
Definition bl1_dot.c:13
void bl1_ssetv(int m, float *sigma, float *x, int incx)
Definition bl1_setv.c:26

References bl1_saxpyv(), bl1_sdot(), bl1_ssetv(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, chi1, FLA_ZERO, and i.

Referenced by FLA_Fused_Ahx_Ax_opt_var1(), FLA_Hess_UT_step_ofs_var2(), FLA_Hess_UT_step_ofs_var3(), and FLA_Hess_UT_step_ofs_var4().

◆ FLA_Fused_Ahx_Ax_opt_var1()

FLA_Error FLA_Fused_Ahx_Ax_opt_var1 ( FLA_Obj  A,
FLA_Obj  x,
FLA_Obj  v,
FLA_Obj  w 
)
14{
15/*
16 Effective computation:
17 v = A' * x;
18 w = A * x;
19*/
20 FLA_Datatype datatype;
21 int m_A, n_A;
22 int rs_A, cs_A;
23 int inc_x, inc_v, inc_w;
24
25 datatype = FLA_Obj_datatype( A );
26
27 m_A = FLA_Obj_length( A );
28 n_A = FLA_Obj_width( A );
29
32
34
36
38
39
40 switch ( datatype )
41 {
42 case FLA_FLOAT:
43 {
44 float* buff_A = FLA_FLOAT_PTR( A );
45 float* buff_x = FLA_FLOAT_PTR( x );
46 float* buff_v = FLA_FLOAT_PTR( v );
47 float* buff_w = FLA_FLOAT_PTR( w );
48
50 n_A,
54 buff_w, inc_w );
55
56 break;
57 }
58
59 case FLA_DOUBLE:
60 {
61 double* buff_A = FLA_DOUBLE_PTR( A );
62 double* buff_x = FLA_DOUBLE_PTR( x );
63 double* buff_v = FLA_DOUBLE_PTR( v );
64 double* buff_w = FLA_DOUBLE_PTR( w );
65
67 n_A,
71 buff_w, inc_w );
72
73 break;
74 }
75
76 case FLA_COMPLEX:
77 {
82
84 n_A,
88 buff_w, inc_w );
89
90 break;
91 }
92
94 {
99
101 n_A,
102 buff_A, rs_A, cs_A,
103 buff_x, inc_x,
104 buff_v, inc_v,
105 buff_w, inc_w );
106
107 break;
108 }
109 }
110
111 return FLA_SUCCESS;
112}
FLA_Error FLA_Fused_Ahx_Ax_opc_var1(int m_A, int n_A, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_x, int inc_x, scomplex *buff_v, int inc_v, scomplex *buff_w, int inc_w)
Definition FLA_Fused_Ahx_Ax_opt_var1.c:256
FLA_Error FLA_Fused_Ahx_Ax_opd_var1(int m_A, int n_A, double *buff_A, int rs_A, int cs_A, double *buff_x, int inc_x, double *buff_v, int inc_v, double *buff_w, int inc_w)
Definition FLA_Fused_Ahx_Ax_opt_var1.c:173
FLA_Error FLA_Fused_Ahx_Ax_opz_var1(int m_A, int n_A, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_x, int inc_x, dcomplex *buff_v, int inc_v, dcomplex *buff_w, int inc_w)
Definition FLA_Fused_Ahx_Ax_opt_var1.c:307
FLA_Error FLA_Fused_Ahx_Ax_ops_var1(int m_A, int n_A, float *buff_A, int rs_A, int cs_A, float *buff_x, int inc_x, float *buff_v, int inc_v, float *buff_w, int inc_w)
Definition FLA_Fused_Ahx_Ax_opt_var1.c:116
dim_t FLA_Obj_width(FLA_Obj obj)
Definition FLA_Query.c:123
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition FLA_Query.c:167
dim_t FLA_Obj_length(FLA_Obj obj)
Definition FLA_Query.c:116
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition FLA_Query.c:174
dim_t FLA_Obj_vector_inc(FLA_Obj obj)
Definition FLA_Query.c:145
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition FLA_Query.c:13
int FLA_Datatype
Definition FLA_type_defs.h:49
Definition blis_type_defs.h:138

References FLA_Fused_Ahx_Ax_opc_var1(), FLA_Fused_Ahx_Ax_opd_var1(), FLA_Fused_Ahx_Ax_ops_var1(), FLA_Fused_Ahx_Ax_opz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), FLA_Obj_width(), and i.

◆ FLA_Fused_Ahx_Ax_opz_var1()

FLA_Error FLA_Fused_Ahx_Ax_opz_var1 ( int  m_A,
int  n_A,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_x,
int  inc_x,
dcomplex buff_v,
int  inc_v,
dcomplex buff_w,
int  inc_w 
)
313{
314 dcomplex zero = bl1_z0();
315 int i;
316
319
326
327 int n_run = n_A / 2;
328 int n_left = n_A % 2;
329 int step_a1 = 2*cs_A;
330 int step_nu1 = 2*inc_v;
331 int step_chi1 = 2*inc_x;
332
333 bl1_zsetv( m_A,
334 &zero,
335 buff_w, inc_w );
336
337 a1 = buff_A;
338 a2 = buff_A + cs_A;
339 nu1 = buff_v;
340 nu2 = buff_v + inc_v;
341 chi1 = buff_x;
342 chi2 = buff_x + inc_x;
343
344 for ( i = 0; i < n_run; ++i )
345 {
346 /*------------------------------------------------------------*/
347
348/*
349 bl1_zdotaxpy( m_A,
350 a1, rs_A,
351 x, inc_x,
352 chi1,
353 nu1,
354 w, inc_w );
355*/
356
358 a1, rs_A,
359 a2, rs_A,
360 x, inc_x,
361 chi1,
362 chi2,
363 nu1,
364 nu2,
365 w, inc_w );
366
367 /*------------------------------------------------------------*/
368
369 a1 += step_a1;
370 a2 += step_a1;
371 nu1 += step_nu1;
372 nu2 += step_nu1;
373 chi1 += step_chi1;
374 chi2 += step_chi1;
375 }
376
377 if ( n_left > 0 )
378 {
379 for ( i = 0; i < n_left; ++i )
380 {
382 a1, rs_A,
383 x, inc_x,
384 chi1,
385 nu1,
386 w, inc_w );
387
388 a1 += rs_A;
389 nu1 += inc_v;
390 chi1 += inc_x;
391 }
392 }
393
394 return FLA_SUCCESS;
395}
void bl1_zdotaxpy(int n, dcomplex *a, int inc_a, dcomplex *x, int inc_x, dcomplex *kappa, dcomplex *rho, dcomplex *w, int inc_w)
Definition bl1_dotaxpy.c:258
void bl1_zdotv2axpyv2b(int n, dcomplex *a1, int inc_a1, dcomplex *a2, int inc_a2, dcomplex *x, int inc_x, dcomplex *kappa1, dcomplex *kappa2, dcomplex *rho1, dcomplex *rho2, dcomplex *w, int inc_w)
Definition bl1_dotv2axpyv2b.c:331
dcomplex bl1_z0(void)
Definition bl1_constants.c:133
void bl1_zsetv(int m, dcomplex *sigma, dcomplex *x, int incx)
Definition bl1_setv.c:66

References bl1_z0(), bl1_zdotaxpy(), bl1_zdotv2axpyv2b(), bl1_zsetv(), chi1, chi2, i, n_left, and n_run.

Referenced by FLA_Fused_Ahx_Ax_opt_var1(), FLA_Hess_UT_step_ofz_var2(), FLA_Hess_UT_step_ofz_var3(), and FLA_Hess_UT_step_ofz_var4().