libflame revision_anchor
Functions | Variables
bl1_axpyv2b.c File Reference

(r)

Functions

void bl1_saxpyv2b (int n, float *alpha1, float *alpha2, float *x1, int inc_x1, float *x2, int inc_x2, float *y, int inc_y)
 
void bl1_daxpyv2b (int n, double *alpha1, double *alpha2, double *x1, int inc_x1, double *x2, int inc_x2, double *y, int inc_y)
 
 for (i=0;i< n_run;++i)
 
 if (n_left==1)
 
void bl1_caxpyv2b (int n, scomplex *alpha1, scomplex *alpha2, scomplex *x1, int inc_x1, scomplex *x2, int inc_x2, scomplex *y, int inc_y)
 
void bl1_zaxpyv2b (int n, dcomplex *alpha1, dcomplex *alpha2, dcomplex *x1, int inc_x1, dcomplex *x2, int inc_x2, dcomplex *y, int inc_y)
 

Variables

double *restrict chi2
 
double *restrict psi1 = y
 
double alpha1_c = *alpha1
 
double alpha2_c = *alpha2
 
double temp1
 
double temp2
 
int i
 
int n_run = n / 2
 
int n_left = n % 2
 
int twoinc_x1 = 2*inc_x1
 
int twoinc_x2 = 2*inc_x2
 
int twoinc_y = 2*inc_y
 
 chi1 = x1
 
dcomplex temp
 

Function Documentation

◆ bl1_caxpyv2b()

void bl1_caxpyv2b ( int  n,
scomplex alpha1,
scomplex alpha2,
scomplex x1,
int  inc_x1,
scomplex x2,
int  inc_x2,
scomplex y,
int  inc_y 
)
205{
206 bl1_abort();
207}
void bl1_abort(void)
Definition bl1_abort.c:13

References bl1_abort().

◆ bl1_daxpyv2b()

void bl1_daxpyv2b ( int  n,
double alpha1,
double alpha2,
double x1,
int  inc_x1,
double x2,
int  inc_x2,
double y,
int  inc_y 
)
38{
39 double* restrict chi1;
40 double* restrict chi2;
41 double* restrict psi1;
42 int i;
43
44 int n_pre;
45 int n_run;
46 int n_left;
47
48 v2df_t a1v, a2v;
51 v2df_t y1v;
52 v2df_t y2v;
53
54 if ( inc_x1 != 1 ||
55 inc_x2 != 1 ||
56 inc_y != 1 ) bl1_abort();
57
58 n_pre = 0;
59 if ( ( unsigned long ) y % 16 != 0 )
60 {
61 if ( ( unsigned long ) x1 % 16 == 0 ||
62 ( unsigned long ) x2 % 16 == 0 ) bl1_abort();
63
64 n_pre = 1;
65 }
66
67 n_run = ( n - n_pre ) / 4;
68 n_left = ( n - n_pre ) % 4;
69
70 chi1 = x1;
71 chi2 = x2;
72 psi1 = y;
73
74 if ( n_pre == 1 )
75 {
76 double alpha1_c = *alpha1;
77 double alpha2_c = *alpha2;
78 double chi11_c = *chi1;
79 double chi12_c = *chi2;
80 double temp1;
81
82 // psi1 = psi1 + alpha1 * chi11 + alpha2 * chi12;
84 *psi1 = *psi1 + temp1;
85
86 chi1 += inc_x1;
87 chi2 += inc_x2;
88 psi1 += inc_y;
89 }
90
91 a1v.v = _mm_loaddup_pd( ( double* )alpha1 );
92 a2v.v = _mm_loaddup_pd( ( double* )alpha2 );
93
94 for ( i = 0; i < n_run; ++i )
95 {
96 x11v.v = _mm_load_pd( ( double* )chi1 );
97 x12v.v = _mm_load_pd( ( double* )chi2 );
98 y1v.v = _mm_load_pd( ( double* )psi1 );
99
100 x21v.v = _mm_load_pd( ( double* )(chi1 + 2) );
101 x22v.v = _mm_load_pd( ( double* )(chi2 + 2) );
102 y2v.v = _mm_load_pd( ( double* )(psi1 + 2) );
103
104 y1v.v += a1v.v * x11v.v + a2v.v * x12v.v;
105 y2v.v += a1v.v * x21v.v + a2v.v * x22v.v;
106
107 _mm_store_pd( ( double* )psi1, y1v.v );
108 _mm_store_pd( ( double* )(psi1 + 2), y2v.v );
109
110 //chi1 += step_x1;
111 //chi2 += step_x2;
112 //psi1 += step_y;
113 chi1 += 4;
114 chi2 += 4;
115 psi1 += 4;
116 }
117
118 if ( n_left > 0 )
119 {
120 double alpha1_c = *alpha1;
121 double alpha2_c = *alpha2;
122
123 for ( i = 0; i < n_left; ++i )
124 {
125 double chi11_c = *chi1;
126 double chi12_c = *chi2;
127 double psi1_c = *psi1;
128 double temp1;
129
131 *psi1 = psi1_c + temp1;
132
133 chi1 += inc_x1;
134 chi2 += inc_x2;
135 psi1 += inc_y;
136 }
137 }
138}
int n_pre
Definition bl1_axmyv2.c:147
chi1
Definition bl1_axpyv2b.c:156
int n_left
Definition bl1_axpyv2b.c:151
int n_run
Definition bl1_axpyv2b.c:150
double *restrict psi1
Definition bl1_axpyv2b.c:143
int i
Definition bl1_axpyv2b.c:148
double temp1
Definition bl1_axpyv2b.c:146
double *restrict chi2
Definition bl1_axpyv2b.c:142
double alpha1_c
Definition bl1_axpyv2b.c:144
double alpha2_c
Definition bl1_axpyv2b.c:145
double *restrict alpha1
Definition bl1_axpyv2bdotaxpy.c:198
x1
Definition bl1_dotsv2.c:374
double *restrict alpha2
Definition bl1_dotv2axpyv2b.c:188
Definition blis_type_defs.h:117

References alpha1, alpha1_c, alpha2, alpha2_c, bl1_abort(), chi1, chi2, i, n_left, n_pre, n_run, psi1, temp1, v2df_t::v, and x1.

Referenced by FLA_Fused_Ahx_Axpy_Ax_opd_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_opd_var1(), FLA_Fused_Gerc2_opd_var1(), and FLA_Fused_UZhu_ZUhu_opd_var1().

◆ bl1_saxpyv2b()

void bl1_saxpyv2b ( int  n,
float alpha1,
float alpha2,
float x1,
int  inc_x1,
float x2,
int  inc_x2,
float y,
int  inc_y 
)
26{
27 bl1_abort();
28}

References bl1_abort().

◆ bl1_zaxpyv2b()

void bl1_zaxpyv2b ( int  n,
dcomplex alpha1,
dcomplex alpha2,
dcomplex x1,
int  inc_x1,
dcomplex x2,
int  inc_x2,
dcomplex y,
int  inc_y 
)
217{
221 int i;
225 v2df_t t1v, y1v;
227
228 chi1 = x1;
229 chi2 = x2;
230 psi1 = y;
231
232 alpha1v.v = _mm_load_pd( ( double* )alpha1 );
233 alpha2v.v = _mm_load_pd( ( double* )alpha2 );
236
237 if ( inc_x1 == 1 &&
238 inc_x2 == 1 &&
239 inc_y == 1 )
240 {
241 for ( i = 0; i < n; ++i )
242 {
243 x11v.v = _mm_load_pd( ( double* )chi1 );
244 x12v.v = _mm_shuffle_pd( x11v.v, x11v.v, _MM_SHUFFLE2 (1,1) );
245 x11v.v = _mm_shuffle_pd( x11v.v, x11v.v, _MM_SHUFFLE2 (0,0) );
246 acbc.v = alpha1v.v * x11v.v;
247 bdad.v = alpha1rv.v * x12v.v;
248 t1v.v = _mm_addsub_pd( acbc.v, bdad.v );
249
250 x11v.v = _mm_load_pd( ( double* )chi2 );
251 x12v.v = _mm_shuffle_pd( x11v.v, x11v.v, _MM_SHUFFLE2 (1,1) );
252 x11v.v = _mm_shuffle_pd( x11v.v, x11v.v, _MM_SHUFFLE2 (0,0) );
253 acbc.v = alpha2v.v * x11v.v;
254 bdad.v = alpha2rv.v * x12v.v;
255 t1v.v = t1v.v + _mm_addsub_pd( acbc.v, bdad.v );
256
257 y1v.v = _mm_load_pd( ( double* )psi1 );
258 y1v.v = y1v.v + t1v.v;
259 _mm_store_pd( ( double* )psi1, y1v.v );
260
261 chi1 += 1;
262 chi2 += 1;
263 psi1 += 1;
264 }
265 }
266 else
267 {
268 for ( i = 0; i < n; ++i )
269 {
270 x11v.v = _mm_load_pd( ( double* )chi1 );
271 x12v.v = _mm_shuffle_pd( x11v.v, x11v.v, _MM_SHUFFLE2 (1,1) );
272 x11v.v = _mm_shuffle_pd( x11v.v, x11v.v, _MM_SHUFFLE2 (0,0) );
273 acbc.v = alpha1v.v * x11v.v;
274 bdad.v = alpha1rv.v * x12v.v;
275 t1v.v = _mm_addsub_pd( acbc.v, bdad.v );
276
277 x11v.v = _mm_load_pd( ( double* )chi2 );
278 x12v.v = _mm_shuffle_pd( x11v.v, x11v.v, _MM_SHUFFLE2 (1,1) );
279 x11v.v = _mm_shuffle_pd( x11v.v, x11v.v, _MM_SHUFFLE2 (0,0) );
280 acbc.v = alpha2v.v * x11v.v;
281 bdad.v = alpha2rv.v * x12v.v;
282 t1v.v = t1v.v + _mm_addsub_pd( acbc.v, bdad.v );
283
284 y1v.v = _mm_load_pd( ( double* )psi1 );
285 y1v.v = y1v.v + t1v.v;
286 _mm_store_pd( ( double* )psi1, y1v.v );
287
288 chi1 += inc_x1;
289 chi2 += inc_x2;
290 psi1 += inc_y;
291 }
292 }
293}
Definition blis_type_defs.h:138
__m128d v
Definition blis_type_defs.h:118

References alpha1, alpha2, chi1, chi2, i, psi1, v2df_t::v, and x1.

Referenced by FLA_Fused_Ahx_Axpy_Ax_opz_var1(), FLA_Fused_Gerc2_Ahx_Ax_opz_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_opz_var1(), FLA_Fused_Gerc2_opz_var1(), FLA_Fused_Her2_Ax_l_opz_var1(), and FLA_Fused_Uhu_Yhu_Zhu_opz_var1().

◆ for()

for ( )
164 {
165 double chi11_c = *chi1;
166 double chi21_c = *(chi1 + inc_x1);
167 double chi12_c = *chi2;
168 double chi22_c = *(chi2 + inc_x2);
169 double psi1_c = *psi1;
170 double psi2_c = *(psi1 + inc_y);
171
172 // psi1 = psi1 + alpha1 * chi11 + alpha2 * chi12;
173 // psi2 = psi2 + alpha1 * chi21 + alpha2 * chi22;
176
177 *psi1 = psi1_c + temp1;
178 *(psi1 + inc_y) = psi2_c + temp2;
179
180 chi1 += twoinc_x1;
181 chi2 += twoinc_x2;
182 psi1 += twoinc_y;
183 }
double temp2
Definition bl1_axpyv2b.c:147
int twoinc_x2
Definition bl1_axpyv2b.c:153
int twoinc_y
Definition bl1_axpyv2b.c:154
int twoinc_x1
Definition bl1_axpyv2b.c:152

References alpha1_c, alpha2_c, chi1, chi2, psi1, temp1, temp2, twoinc_x1, twoinc_x2, and twoinc_y.

◆ if()

if ( n_left  = = 1)
186 {
187 double chi11_c = *chi1;
188 double chi12_c = *chi2;
189
190 // psi1 = psi1 + alpha1 * chi11 + alpha2 * chi12;
192
193 *psi1 = *psi1 + temp1;
194 }

References alpha1_c, alpha2_c, chi1, chi2, psi1, and temp1.

Variable Documentation

◆ alpha1_c

dcomplex alpha1_c = *alpha1

◆ alpha2_c

dcomplex alpha2_c = *alpha2

◆ chi1

chi1 = x1

Referenced by bl1_daxpyv2b(), bl1_zaxpyv2b(), for(), and if().

◆ chi2

◆ i

int i

Referenced by bl1_daxpyv2b(), and bl1_zaxpyv2b().

◆ n_left

int n_left = n % 2

Referenced by bl1_daxpyv2b().

◆ n_run

int n_run = n / 2

Referenced by bl1_daxpyv2b().

◆ psi1

psi1 = y

Referenced by bl1_daxpyv2b(), bl1_zaxpyv2b(), for(), and if().

◆ temp

dcomplex temp

◆ temp1

double temp1

◆ temp2

double temp2

◆ twoinc_x1

int twoinc_x1 = 2*inc_x1

Referenced by for().

◆ twoinc_x2

int twoinc_x2 = 2*inc_x2

Referenced by for().

◆ twoinc_y

int twoinc_y = 2*inc_y