libflame revision_anchor
Functions | Variables
bl1_dotaxmyv2.c File Reference

(r)

Functions

void bl1_sdotaxmyv2 (int n, float *alpha, float *beta, float *x, int inc_x, float *u, int inc_u, float *rho, float *y, int inc_y, float *z, int inc_z)
 
void bl1_ddotaxmyv2 (int n, double *alpha, double *beta, double *x, int inc_x, double *u, int inc_u, double *rho, double *y, int inc_y, double *z, int inc_z)
 
 if (inc_x !=1||inc_u !=1||inc_y !=1||inc_z !=1)
 
 for (i=0;i< n_run;++i)
 
 if (n_left > 0)
 
void bl1_cdotaxmyv2 (int n, scomplex *alpha, scomplex *beta, scomplex *x, int inc_x, scomplex *u, int inc_u, scomplex *rho, scomplex *y, int inc_y, scomplex *z, int inc_z)
 
void bl1_zdotaxmyv2 (int n, dcomplex *alpha, dcomplex *beta, dcomplex *x, int inc_x, dcomplex *u, int inc_u, dcomplex *rho, dcomplex *y, int inc_y, dcomplex *z, int inc_z)
 

Variables

double *restrict upsilon1
 
double *restrict psi1
 
double *restrict zeta1
 
double alpha_c
 
double beta_c
 
double rho_c
 
int i
 
int n_pre
 
int n_run
 
int n_left
 
rho = rho_c
 

Function Documentation

◆ bl1_cdotaxmyv2()

void bl1_cdotaxmyv2 ( int  n,
scomplex alpha,
scomplex beta,
scomplex x,
int  inc_x,
scomplex u,
int  inc_u,
scomplex rho,
scomplex y,
int  inc_y,
scomplex z,
int  inc_z 
)
271{
272 bl1_abort();
273}
void bl1_abort(void)
Definition bl1_abort.c:13

References bl1_abort().

◆ bl1_ddotaxmyv2()

void bl1_ddotaxmyv2 ( int  n,
double alpha,
double beta,
double x,
int  inc_x,
double u,
int  inc_u,
double rho,
double y,
int  inc_y,
double z,
int  inc_z 
)
43{
44 double* restrict chi1;
45 double* restrict upsilon1;
46 double* restrict psi1;
47 double* restrict zeta1;
48 double rho_c;
49 int i;
50
51 int n_pre;
52 int n_run;
53 int n_left;
54
55 v2df_t a1v, b1v;
57 v2df_t x1v, u1v, y1v, z1v;
58
59 if ( inc_x != 1 ||
60 inc_u != 1 ||
61 inc_y != 1 ||
62 inc_z != 1 ) bl1_abort();
63
64 n_pre = 0;
65 if ( ( unsigned long ) z % 16 != 0 )
66 {
67 if ( ( unsigned long ) x % 16 == 0 ||
68 ( unsigned long ) u % 16 == 0 ||
69 ( unsigned long ) y % 16 == 0 ) bl1_abort();
70
71 n_pre = 1;
72 }
73
74 n_run = ( n - n_pre ) / 2;
75 n_left = ( n - n_pre ) % 2;
76
77 chi1 = x;
78 upsilon1 = u;
79 psi1 = y;
80 zeta1 = z;
81
82 rho_c = 0.0;
83
84 if ( n_pre == 1 )
85 {
86 double alpha_c = *alpha;
87 double beta_c = *beta;
88 double chi1_c = *chi1;
89 double upsilon_c = *upsilon1;
90
92 *psi1 -= alpha_c * chi1_c;
93 *zeta1 -= beta_c * chi1_c;
94
95 chi1 += inc_x;
96 upsilon1 += inc_u;
97 psi1 += inc_y;
98 zeta1 += inc_z;
99 }
100
101 a1v.v = _mm_loaddup_pd( ( double* )alpha );
102 b1v.v = _mm_loaddup_pd( ( double* )beta );
103
104 rho1v.v = _mm_setzero_pd();
105
106 for ( i = 0; i < n_run; ++i )
107 {
108 x1v.v = _mm_load_pd( ( double* )chi1 );
109 u1v.v = _mm_load_pd( ( double* )upsilon1 );
110 y1v.v = _mm_load_pd( ( double* )psi1 );
111 z1v.v = _mm_load_pd( ( double* )zeta1 );
112
113 rho1v.v += x1v.v * u1v.v;
114 y1v.v -= a1v.v * x1v.v;
115 z1v.v -= b1v.v * x1v.v;
116
117 _mm_store_pd( ( double* )psi1, y1v.v );
118 _mm_store_pd( ( double* )zeta1, z1v.v );
119
120 chi1 += 2;
121 upsilon1 += 2;
122 psi1 += 2;
123 zeta1 += 2;
124 }
125
126 rho_c += rho1v.d[0] + rho1v.d[1];
127
128 if ( n_left > 0 )
129 {
130 double alpha_c = *alpha;
131 double beta_c = *beta;
132
133 for( i = 0; i < n_left; ++i )
134 {
135 double chi1_c = *chi1;
136 double upsilon_c = *upsilon1;
137
139 *psi1 -= alpha_c * chi1_c;
140 *zeta1 -= beta_c * chi1_c;
141
142 chi1 += inc_x;
143 upsilon1 += inc_u;
144 psi1 += inc_y;
145 zeta1 += inc_z;
146 }
147 }
148
149 *rho = rho_c;
150}
chi1
Definition bl1_axmyv2.c:366
double beta_c
Definition bl1_dotaxmyv2.c:158
double alpha_c
Definition bl1_dotaxmyv2.c:157
int n_left
Definition bl1_dotaxmyv2.c:164
double *restrict upsilon1
Definition bl1_dotaxmyv2.c:154
int n_pre
Definition bl1_dotaxmyv2.c:162
double rho_c
Definition bl1_dotaxmyv2.c:159
double *restrict psi1
Definition bl1_dotaxmyv2.c:155
int n_run
Definition bl1_dotaxmyv2.c:163
* rho
Definition bl1_dotaxmyv2.c:258
int i
Definition bl1_dotaxmyv2.c:160
double *restrict zeta1
Definition bl1_dotaxmyv2.c:156
Definition blis_type_defs.h:117

References alpha_c, beta_c, bl1_abort(), chi1, v2df_t::d, i, n_left, n_pre, n_run, psi1, rho, rho_c, upsilon1, v2df_t::v, and zeta1.

Referenced by FLA_Fused_Uhu_Yhu_Zhu_opd_var1().

◆ bl1_sdotaxmyv2()

void bl1_sdotaxmyv2 ( int  n,
float alpha,
float beta,
float x,
int  inc_x,
float u,
int  inc_u,
float rho,
float y,
int  inc_y,
float z,
int  inc_z 
)
29{
30 bl1_abort();
31}

References bl1_abort().

◆ bl1_zdotaxmyv2()

void bl1_zdotaxmyv2 ( int  n,
dcomplex alpha,
dcomplex beta,
dcomplex x,
int  inc_x,
dcomplex u,
int  inc_u,
dcomplex rho,
dcomplex y,
int  inc_y,
dcomplex z,
int  inc_z 
)
285{
290 int i;
291
295 v2df_t x1v, x1rv;
296 v2df_t y1v;
297 v2df_t z1v;
301
302 if ( inc_x != 1 ||
303 inc_u != 1 ||
304 inc_y != 1 ||
305 inc_z != 1 ) bl1_abort();
306
307 chi1 = x;
308 upsilon1 = u;
309 psi1 = y;
310 zeta1 = z;
311
312 //rho_c.real = 0.0;
313 //rho_c.imag = 0.0;
314 rho1v.v = _mm_setzero_pd();
315
316 //alpha_c = *alpha;
317 //beta_c = *beta;
318 alpha11v.v = _mm_loaddup_pd( ( double* )&(alpha->real) );
319 alpha12v.v = _mm_loaddup_pd( ( double* )&(alpha->imag) );
320 beta11v.v = _mm_loaddup_pd( ( double* )&(beta->real) );
321 beta12v.v = _mm_loaddup_pd( ( double* )&(beta->imag) );
322
323 for ( i = 0; i < n; ++i )
324 {
325 //dcomplex chi1_c = *chi1;
326 x1v.v = _mm_load_pd( ( double* )chi1 );
327
328 //psi1->real -= alpha_c.real * chi1_c.real - alpha_c.imag * chi1_c.imag;
329 //psi1->imag -= alpha_c.real * chi1_c.imag + alpha_c.imag * chi1_c.real;
330 x1rv.v = _mm_shuffle_pd( x1v.v, x1v.v, _MM_SHUFFLE2 (0,1) );
331 acad.v = alpha11v.v * x1v.v;
332 bdbc.v = alpha12v.v * x1rv.v;
333 y1v.v = _mm_load_pd( ( double* )psi1 );
334 y1v.v = y1v.v - _mm_addsub_pd( acad.v, bdbc.v );
335 _mm_store_pd( ( double* )psi1, y1v.v );
336
337 //zeta1->real -= beta_c.real * chi1_c.real - beta_c.imag * chi1_c.imag;
338 //zeta1->imag -= beta_c.real * chi1_c.imag + beta_c.imag * chi1_c.real;
339 x1rv.v = _mm_shuffle_pd( x1v.v, x1v.v, _MM_SHUFFLE2 (0,1) );
340 acad.v = beta11v.v * x1v.v;
341 bdbc.v = beta12v.v * x1rv.v;
342 z1v.v = _mm_load_pd( ( double* )zeta1 );
343 z1v.v = z1v.v - _mm_addsub_pd( acad.v, bdbc.v );
344 _mm_store_pd( ( double* )zeta1, z1v.v );
345
346 //rho_c.real = chi1_c.real * upsilon1_c.real - -chi1_c.imag * upsilon1_c.imag;
347 //rho_c.imag = chi1_c.real * upsilon1_c.imag + -chi1_c.imag * upsilon1_c.real;
348 x1rv.v = _mm_shuffle_pd( x1v.v, x1v.v, _MM_SHUFFLE2 (0,1) );
349 u11v.v = _mm_loaddup_pd( ( double* )&(upsilon1->real) );
350 u12v.v = _mm_loaddup_pd( ( double* )&(upsilon1->imag) );
351 bcac.v = x1rv.v * u11v.v;
352 adbd.v = x1v.v * u12v.v;
353 rho1v.v = rho1v.v + _mm_addsub_pd( bcac.v, adbd.v );
354
355 chi1 += 1;
356 upsilon1 += 1;
357 psi1 += 1;
358 zeta1 += 1;
359 }
360
361 rho1v.v = _mm_shuffle_pd( rho1v.v, rho1v.v, _MM_SHUFFLE2 (0,1) );
362
363 rho1v.d[1] = -rho1v.d[1];
364
365 _mm_store_pd( ( double* )rho, rho1v.v );
366}
Definition blis_type_defs.h:138

References bl1_abort(), chi1, v2df_t::d, i, dcomplex::imag, psi1, dcomplex::real, rho, upsilon1, v2df_t::v, and zeta1.

◆ for()

for ( )
210 {
211 double chi1_c = *chi1;
212 double chi2_c = *(chi1 + 1);
213 double upsilon1_c = *upsilon1;
214 double upsilon2_c = *(upsilon1 + 1);
215 double psi1_c = *psi1;
216 double psi2_c = *(psi1 + 1);
217 double zeta1_c = *zeta1;
218 double zeta2_c = *(zeta1 + 1);
219
222
223 psi1_c -= alpha_c * chi1_c;
224 psi2_c -= alpha_c * chi2_c;
225
226 zeta1_c -= beta_c * chi1_c;
227 zeta2_c -= beta_c * chi2_c;
228
229 *psi1 = psi1_c;
230 *(psi1 + 1) = psi2_c;
231 *zeta1 = zeta1_c;
232 *(zeta1 + 1) = zeta2_c;
233
234 chi1 += 2*inc_x;
235 upsilon1 += 2*inc_u;
236 psi1 += 2*inc_y;
237 zeta1 += 2*inc_z;
238 }

References alpha_c, beta_c, chi1, psi1, rho_c, upsilon1, and zeta1.

◆ if() [1/2]

if ( inc_x = 1 || inc_u != 1 || inc_y != 1 || inc_z != 1)
195 {
196 double chi1_c = *chi1;
197 double upsilon_c = *upsilon1;
198
200 *psi1 -= alpha_c * chi1_c;
201 *zeta1 -= beta_c * chi1_c;
202
203 chi1 += inc_x;
204 upsilon1 += inc_u;
205 psi1 += inc_y;
206 zeta1 += inc_z;
207 }

◆ if() [2/2]

if ( n_left  ,
 
)
241 {
242 for( i = 0; i < n_left; ++i )
243 {
244 double chi1_c = *chi1;
245 double upsilon_c = *upsilon1;
246
248 *psi1 -= alpha_c * chi1_c;
249 *zeta1 -= beta_c * chi1_c;
250
251 chi1 += inc_x;
252 upsilon1 += inc_u;
253 psi1 += inc_y;
254 zeta1 += inc_z;
255 }
256 }

References alpha_c, beta_c, chi1, i, n_left, psi1, rho_c, upsilon1, and zeta1.

Variable Documentation

◆ alpha_c

dcomplex alpha_c

Referenced by bl1_ddotaxmyv2(), for(), and if().

◆ beta_c

dcomplex beta_c

Referenced by bl1_ddotaxmyv2(), for(), and if().

◆ i

int i

Referenced by bl1_ddotaxmyv2(), bl1_zdotaxmyv2(), and if().

◆ n_left

int n_left

Referenced by bl1_ddotaxmyv2(), and if().

◆ n_pre

int n_pre

Referenced by bl1_ddotaxmyv2().

◆ n_run

int n_run

Referenced by bl1_ddotaxmyv2().

◆ psi1

◆ rho

* rho = rho_c

Referenced by bl1_ddotaxmyv2(), and bl1_zdotaxmyv2().

◆ rho_c

dcomplex rho_c

Referenced by bl1_ddotaxmyv2(), for(), and if().

◆ upsilon1

dcomplex* restrict upsilon1
Initial value:
{
double* restrict chi1

Referenced by bl1_ddotaxmyv2(), bl1_zdotaxmyv2(), for(), and if().

◆ zeta1