libflame revision_anchor
Functions | Variables
bl1_dotaxpy.c File Reference

(r)

Functions

void bl1_sdotaxpy (int n, float *a, int inc_a, float *x, int inc_x, float *kappa, float *rho, float *w, int inc_w)
 
void bl1_ddotaxpy (int n, double *a, int inc_a, double *x, int inc_x, double *kappa, double *rho, double *w, int inc_w)
 
 if (inc_a !=1||inc_x !=1||inc_w !=1)
 
 for (i=0;i< n_run;++i)
 
 if (n_left > 0)
 
void bl1_cdotaxpy (int n, scomplex *a, int inc_a, scomplex *x, int inc_x, scomplex *kappa, scomplex *rho, scomplex *w, int inc_w)
 
void bl1_zdotaxpy (int n, dcomplex *a, int inc_a, dcomplex *x, int inc_x, dcomplex *kappa, dcomplex *rho, dcomplex *w, int inc_w)
 

Variables

double *restrict chi1
 
double *restrict omega1 = w
 
double kappa_c = *kappa
 
double rho_c
 
int i
 
int n_pre
 
int n_run
 
int n_left
 
rho = rho_c
 
 alpha1 = a
 
rho_c real = 0.0
 
rho_c imag = 0.0
 

Function Documentation

◆ bl1_cdotaxpy()

void bl1_cdotaxpy ( int  n,
scomplex a,
int  inc_a,
scomplex x,
int  inc_x,
scomplex kappa,
scomplex rho,
scomplex w,
int  inc_w 
)
253{
254 bl1_abort();
255}
void bl1_abort(void)
Definition bl1_abort.c:13

References bl1_abort().

◆ bl1_ddotaxpy()

void bl1_ddotaxpy ( int  n,
double a,
int  inc_a,
double x,
int  inc_x,
double kappa,
double rho,
double w,
int  inc_w 
)
38{
39 double* restrict alpha1;
40 double* restrict chi1;
41 double* restrict omega1;
42 double rho_c;
43 int i;
44
45 int n_pre;
46 int n_run;
47 int n_left;
48
50 v2df_t a1v, x1v, w1v;
51 v2df_t a2v, x2v, w2v;
52
53 if ( inc_a != 1 ||
54 inc_x != 1 ||
55 inc_w != 1 ) bl1_abort();
56
57 n_pre = 0;
58 if ( ( unsigned long ) a % 16 != 0 )
59 {
60 if ( ( unsigned long ) x % 16 == 0 ||
61 ( unsigned long ) w % 16 == 0 ) bl1_abort();
62
63 n_pre = 1;
64 }
65
66 n_run = ( n - n_pre ) / 4;
67 n_left = ( n - n_pre ) % 4;
68
69 alpha1 = a;
70 chi1 = x;
71 omega1 = w;
72
73 rho_c = 0.0;
74
75 if ( n_pre == 1 )
76 {
77 double kappa_c = *kappa;
78 double alpha1_c = *alpha1;
79 double chi1_c = *chi1;
80 double omega1_c = *omega1;
81
84
86
87 alpha1 += inc_a;
88 chi1 += inc_x;
89 omega1 += inc_w;
90 }
91
93
94 k1v.v = _mm_loaddup_pd( ( double* )kappa );
95
96 for ( i = 0; i < n_run; ++i )
97 {
98 a1v.v = _mm_load_pd( ( double* )alpha1 );
99 x1v.v = _mm_load_pd( ( double* )chi1 );
100 w1v.v = _mm_load_pd( ( double* )omega1 );
101
102 a2v.v = _mm_load_pd( ( double* )(alpha1 + 2) );
103 x2v.v = _mm_load_pd( ( double* )(chi1 + 2) );
104 w2v.v = _mm_load_pd( ( double* )(omega1 + 2) );
105
106 rho1v.v += a1v.v * x1v.v;
107 w1v.v += k1v.v * a1v.v;
108
109 _mm_store_pd( ( double* )omega1, w1v.v );
110
111 rho1v.v += a2v.v * x2v.v;
112 w2v.v += k1v.v * a2v.v;
113
114 _mm_store_pd( ( double* )(omega1 + 2), w2v.v );
115
116 alpha1 += 4;
117 chi1 += 4;
118 omega1 += 4;
119 }
120
121 if ( n_left > 0 )
122 {
123 for ( i = 0; i < n_left; ++i )
124 {
125 double kappa_c = *kappa;
126 double alpha1_c = *alpha1;
127 double chi1_c = *chi1;
128 double omega1_c = *omega1;
129
130 rho_c += alpha1_c * chi1_c;
132
133 *omega1 = omega1_c;
134
135 alpha1 += inc_a;
136 chi1 += inc_x;
137 omega1 += inc_w;
138 }
139 }
140
141 rho_c += rho1v.d[0] + rho1v.d[1];
142
143 *rho = rho_c;
144}
double alpha1_c
Definition bl1_axpyv2b.c:144
double *restrict omega1
Definition bl1_dotaxpy.c:149
double *restrict chi1
Definition bl1_dotaxpy.c:148
alpha1
Definition bl1_dotaxpy.c:338
int n_left
Definition bl1_dotaxpy.c:156
int n_pre
Definition bl1_dotaxpy.c:154
double rho_c
Definition bl1_dotaxpy.c:151
double kappa_c
Definition bl1_dotaxpy.c:150
int n_run
Definition bl1_dotaxpy.c:155
* rho
Definition bl1_dotaxpy.c:242
int i
Definition bl1_dotaxpy.c:152
Definition blis_type_defs.h:117

References alpha1, alpha1_c, bl1_abort(), chi1, v2df_t::d, i, kappa_c, n_left, n_pre, n_run, omega1, rho, rho_c, and v2df_t::v.

Referenced by FLA_Fused_Ahx_Ax_opd_var1(), and FLA_Fused_UZhu_ZUhu_opd_var1().

◆ bl1_sdotaxpy()

void bl1_sdotaxpy ( int  n,
float a,
int  inc_a,
float x,
int  inc_x,
float kappa,
float rho,
float w,
int  inc_w 
)
26{
27 bl1_abort();
28}

References bl1_abort().

◆ bl1_zdotaxpy()

void bl1_zdotaxpy ( int  n,
dcomplex a,
int  inc_a,
dcomplex x,
int  inc_x,
dcomplex kappa,
dcomplex rho,
dcomplex w,
int  inc_w 
)
265{
269 int i;
270
274 v2df_t x1v, x1rv;
275 v2df_t w1v;
278
279 alpha1 = a;
280 chi1 = x;
281 omega1 = w;
282
283 if ( inc_a != 1 ||
284 inc_x != 1 ||
285 inc_w != 1 ) bl1_abort();
286
287 kappa1v.v = _mm_load_pd( ( double* )kappa );
289
290 rho1v.v = _mm_setzero_pd();
291
292 for ( i = 0; i < n; ++i )
293 {
294 //alpha_c = *alpha1;
295 a11v.v = _mm_loaddup_pd( ( double* )&(alpha1->real) );
296 a12v.v = _mm_loaddup_pd( ( double* )&(alpha1->imag) );
297
298 //rho_c.real += alpha1_c.real * chi1_c.real - -alpha1_c.imag * chi1_c.imag;
299 //rho_c.imag += alpha1_c.real * chi1_c.imag + -alpha1_c.imag * chi1_c.real;
300 x1v.v = _mm_load_pd( ( double* )chi1 );
301 x1rv.v = _mm_shuffle_pd( x1v.v, x1v.v, _MM_SHUFFLE2 (0,1) );
302 adac.v = a11v.v * x1rv.v;
303 bcbd.v = a12v.v * x1v.v;
304 rho1v.v = rho1v.v + _mm_addsub_pd( adac.v, bcbd.v );
305
306 //omega_c = *omega1;
307 w1v.v = _mm_load_pd( ( double* )omega1 );
308
309 //omega1_c.real += kappa_c.real * alpha1_c.real - kappa_c.imag * alpha1_c.imag;
310 //omega1_c.imag += kappa_c.real * alpha1_c.imag + kappa_c.imag * alpha1_c.real;
311 acbc.v = kappa1v.v * a11v.v;
312 bdad.v = kappa1rv.v * a12v.v;
313 w1v.v += _mm_addsub_pd( acbc.v, bdad.v );
314
315 //*omega1 = omega1_c;
316 _mm_store_pd( ( double* )omega1, w1v.v );
317
318 alpha1 += 1;
319 chi1 += 1;
320 omega1 += 1;
321 }
322
323 rho1v.v = _mm_shuffle_pd( rho1v.v, rho1v.v, _MM_SHUFFLE2 (0,1) );
324
325 //rho->real = rho_c.real;
326 //rho->imag = rho_c.imag;
327 _mm_store_pd( ( double* )rho, rho1v.v );
328}
Definition blis_type_defs.h:138

References alpha1, bl1_abort(), chi1, i, omega1, rho, and v2df_t::v.

Referenced by FLA_Fused_Ahx_Ax_opz_var1(), FLA_Fused_Gerc2_Ahx_Ax_opz_var1(), FLA_Fused_Her2_Ax_l_opz_var1(), and FLA_Fused_UZhu_ZUhu_opz_var1().

◆ for()

for ( )
199 {
200 double alpha1_c = *alpha1;
201 double alpha2_c = *(alpha1 + 1);
202 double chi1_c = *chi1;
203 double chi2_c = *(chi1 + 1);
204 double omega1_c = *omega1;
205 double omega2_c = *(omega1 + 1);
206
207 // rho += conj(alpha1) * chi1;
208 rho_c += alpha1_c * chi1_c;
209 rho_c += alpha2_c * chi2_c;
210
211 // omega1 += kappa * alpha1;
214
215 *omega1 = omega1_c;
216 *(omega1 + 1) = omega2_c;
217
218 alpha1 += 2*inc_a;
219 chi1 += 2*inc_x;
220 omega1 += 2*inc_w;
221 }
double alpha2_c
Definition bl1_axpyv2b.c:145

References alpha1, alpha1_c, alpha2_c, chi1, kappa_c, omega1, and rho_c.

◆ if() [1/2]

if ( inc_a = 1 || inc_x != 1 || inc_w != 1)
183 {
184 double alpha1_c = *alpha1;
185 double chi1_c = *chi1;
186 double omega1_c = *omega1;
187
188 rho_c += alpha1_c * chi1_c;
190
191 *omega1 = omega1_c;
192
193 alpha1 += inc_a;
194 chi1 += inc_x;
195 omega1 += inc_w;
196 }

◆ if() [2/2]

if ( n_left  ,
 
)
224 {
225 for ( i = 0; i < n_left; ++i )
226 {
227 double alpha1_c = *alpha1;
228 double chi1_c = *chi1;
229 double omega1_c = *omega1;
230
231 rho_c += alpha1_c * chi1_c;
233
234 *omega1 = omega1_c;
235
236 alpha1 += inc_a;
237 chi1 += inc_x;
238 omega1 += inc_w;
239 }
240 }

References alpha1, alpha1_c, chi1, i, kappa_c, n_left, omega1, and rho_c.

Variable Documentation

◆ alpha1

alpha1 = a

Referenced by bl1_ddotaxpy(), bl1_zdotaxpy(), for(), and if().

◆ chi1

chi1
Initial value:
{
double* restrict alpha1

Referenced by bl1_ddotaxpy(), bl1_zdotaxpy(), for(), and if().

◆ i

int i

Referenced by bl1_ddotaxpy(), bl1_zdotaxpy(), and if().

◆ imag

rho imag = 0.0

◆ kappa_c

kappa_c = *kappa

Referenced by bl1_ddotaxpy(), for(), and if().

◆ n_left

int n_left

Referenced by bl1_ddotaxpy(), and if().

◆ n_pre

int n_pre

Referenced by bl1_ddotaxpy().

◆ n_run

int n_run

Referenced by bl1_ddotaxpy().

◆ omega1

omega1 = w

Referenced by bl1_ddotaxpy(), bl1_zdotaxpy(), for(), and if().

◆ real

rho real = 0.0

◆ rho

* rho = rho_c

Referenced by bl1_ddotaxpy(), and bl1_zdotaxpy().

◆ rho_c

dcomplex rho_c

Referenced by bl1_ddotaxpy(), for(), and if().