libflame revision_anchor
Functions | Variables
bl1_axpyv3b.c File Reference

(r)

Functions

void bl1_saxpyv3b (int n, float *alpha1, float *alpha2, float *alpha3, float *x1, int inc_x1, float *x2, int inc_x2, float *x3, int inc_x3, float *y, int inc_y)
 
void bl1_daxpyv3b (int n, double *alpha1, double *alpha2, double *alpha3, double *x1, int inc_x1, double *x2, int inc_x2, double *x3, int inc_x3, double *y, int inc_y)
 
 for (i=0;i< n_run;++i)
 
 if (n_left==1)
 
void bl1_caxpyv3b (int n, scomplex *alpha1, scomplex *alpha2, scomplex *alpha3, scomplex *x1, int inc_x1, scomplex *x2, int inc_x2, scomplex *x3, int inc_x3, scomplex *y, int inc_y)
 
void bl1_zaxpyv3b (int n, dcomplex *alpha1, dcomplex *alpha2, dcomplex *alpha3, dcomplex *x1, int inc_x1, dcomplex *x2, int inc_x2, dcomplex *x3, int inc_x3, dcomplex *y, int inc_y)
 

Variables

double *restrict chi2
 
double *restrict psi1 = y
 
double alpha1_c = *alpha1
 
double alpha2_c = *alpha2
 
double temp1
 
double temp2
 
int i
 
int n_run = n / 2
 
int n_left = n % 2
 
int twoinc_x1 = 2*inc_x1
 
int twoinc_x2 = 2*inc_x2
 
int twoinc_y = 2*inc_y
 
 chi1 = x1
 

Function Documentation

◆ bl1_caxpyv3b()

void bl1_caxpyv3b ( int  n,
scomplex alpha1,
scomplex alpha2,
scomplex alpha3,
scomplex x1,
int  inc_x1,
scomplex x2,
int  inc_x2,
scomplex x3,
int  inc_x3,
scomplex y,
int  inc_y 
)
219{
220 bl1_abort();
221}
void bl1_abort(void)
Definition bl1_abort.c:13

References bl1_abort().

◆ bl1_daxpyv3b()

void bl1_daxpyv3b ( int  n,
double alpha1,
double alpha2,
double alpha3,
double x1,
int  inc_x1,
double x2,
int  inc_x2,
double x3,
int  inc_x3,
double y,
int  inc_y 
)
43{
44 double* restrict chi1;
45 double* restrict chi2;
46 double* restrict chi3;
47 double* restrict psi1;
48 int i;
49
50 int n_pre;
51 int n_run;
52 int n_left;
53
54 v2df_t a1v, a2v, a3v;
57 v2df_t y1v;
58 v2df_t y2v;
59
60 if ( inc_x1 != 1 ||
61 inc_x2 != 1 ||
62 inc_x3 != 1 ||
63 inc_y != 1 ) bl1_abort();
64
65 n_pre = 0;
66 if ( ( unsigned long ) y % 16 != 0 )
67 {
68 if ( ( unsigned long ) x1 % 16 == 0 ||
69 ( unsigned long ) x2 % 16 == 0 ||
70 ( unsigned long ) x3 % 16 == 0 ) bl1_abort();
71
72 n_pre = 1;
73 }
74
75 n_run = ( n - n_pre ) / 4;
76 n_left = ( n - n_pre ) % 4;
77
78 chi1 = x1;
79 chi2 = x2;
80 chi3 = x3;
81 psi1 = y;
82
83 if ( n_pre == 1 )
84 {
85 double alpha1_c = *alpha1;
86 double alpha2_c = *alpha2;
87 double alpha3_c = *alpha3;
88 double chi11_c = *chi1;
89 double chi12_c = *chi2;
90 double chi13_c = *chi3;
91
93
94 chi1 += inc_x1;
95 chi2 += inc_x2;
96 chi3 += inc_x3;
97 psi1 += inc_y;
98 }
99
100 a1v.v = _mm_loaddup_pd( ( double* )alpha1 );
101 a2v.v = _mm_loaddup_pd( ( double* )alpha2 );
102 a3v.v = _mm_loaddup_pd( ( double* )alpha3 );
103
104 for ( i = 0; i < n_run; ++i )
105 {
106 x11v.v = _mm_load_pd( ( double* )chi1 );
107 x12v.v = _mm_load_pd( ( double* )chi2 );
108 x13v.v = _mm_load_pd( ( double* )chi3 );
109 y1v.v = _mm_load_pd( ( double* )psi1 );
110
111 y1v.v += a1v.v * x11v.v + a2v.v * x12v.v + a3v.v * x13v.v;
112
113 _mm_store_pd( ( double* )psi1, y1v.v );
114
115 x21v.v = _mm_load_pd( ( double* )(chi1 + 2) );
116 x22v.v = _mm_load_pd( ( double* )(chi2 + 2) );
117 x23v.v = _mm_load_pd( ( double* )(chi3 + 2) );
118 y2v.v = _mm_load_pd( ( double* )(psi1 + 2) );
119
120 y2v.v += a1v.v * x21v.v + a2v.v * x22v.v + a3v.v * x23v.v;
121
122 _mm_store_pd( ( double* )(psi1 + 2), y2v.v );
123
124 chi1 += 4;
125 chi2 += 4;
126 chi3 += 4;
127 psi1 += 4;
128 }
129
130 if ( n_left > 0 )
131 {
132 double alpha1_c = *alpha1;
133 double alpha2_c = *alpha2;
134 double alpha3_c = *alpha3;
135
136 for ( i = 0; i < n_left; ++i )
137 {
138 double chi11_c = *chi1;
139 double chi12_c = *chi2;
140 double chi13_c = *chi3;
141
143
144 chi1 += inc_x1;
145 chi2 += inc_x2;
146 chi3 += inc_x3;
147 psi1 += inc_y;
148 }
149 }
150}
int n_pre
Definition bl1_axmyv2.c:147
double *restrict alpha1
Definition bl1_axpyv2bdotaxpy.c:198
chi1
Definition bl1_axpyv3b.c:168
int n_left
Definition bl1_axpyv3b.c:163
double *restrict chi2
Definition bl1_axpyv3b.c:154
double alpha1_c
Definition bl1_axpyv3b.c:156
int n_run
Definition bl1_axpyv3b.c:162
double *restrict psi1
Definition bl1_axpyv3b.c:155
int i
Definition bl1_axpyv3b.c:160
double alpha2_c
Definition bl1_axpyv3b.c:157
x1
Definition bl1_dotsv2.c:374
double *restrict alpha2
Definition bl1_dotv2axpyv2b.c:188
Definition blis_type_defs.h:117

References alpha1, alpha1_c, alpha2, alpha2_c, bl1_abort(), chi1, chi2, i, n_left, n_pre, n_run, psi1, v2df_t::v, and x1.

◆ bl1_saxpyv3b()

void bl1_saxpyv3b ( int  n,
float alpha1,
float alpha2,
float alpha3,
float x1,
int  inc_x1,
float x2,
int  inc_x2,
float x3,
int  inc_x3,
float y,
int  inc_y 
)
29{
30 bl1_abort();
31}

References bl1_abort().

◆ bl1_zaxpyv3b()

void bl1_zaxpyv3b ( int  n,
dcomplex alpha1,
dcomplex alpha2,
dcomplex alpha3,
dcomplex x1,
int  inc_x1,
dcomplex x2,
int  inc_x2,
dcomplex x3,
int  inc_x3,
dcomplex y,
int  inc_y 
)
232{
233 bl1_abort();
234}

References bl1_abort().

◆ for()

for ( )
176 {
177 double chi11_c = *chi1;
178 double chi21_c = *(chi1 + inc_x1);
179 double chi12_c = *chi2;
180 double chi22_c = *(chi2 + inc_x2);
181 double psi1_c = *psi1;
182 double psi2_c = *(psi1 + inc_y);
183
184 // psi1 = psi1 + alpha1 * chi11 + alpha2 * chi12;
185 // psi2 = psi2 + alpha1 * chi21 + alpha2 * chi22;
188
189 *psi1 = psi1_c + temp1;
190 *(psi1 + inc_y) = psi2_c + temp2;
191
192 chi1 += twoinc_x1;
193 chi2 += twoinc_x2;
194 psi1 += twoinc_y;
195 }
double temp2
Definition bl1_axpyv3b.c:159
int twoinc_x2
Definition bl1_axpyv3b.c:165
int twoinc_y
Definition bl1_axpyv3b.c:166
int twoinc_x1
Definition bl1_axpyv3b.c:164
double temp1
Definition bl1_axpyv3b.c:158

References alpha1_c, alpha2_c, chi1, chi2, psi1, temp1, temp2, twoinc_x1, twoinc_x2, and twoinc_y.

◆ if()

if ( n_left  = = 1)
198 {
199 double chi11_c = *chi1;
200 double chi12_c = *chi2;
201
202 // psi1 = psi1 + alpha1 * chi11 + alpha2 * chi12;
204
205 *psi1 = *psi1 + temp1;
206 }

References alpha1_c, alpha2_c, chi1, chi2, psi1, and temp1.

Variable Documentation

◆ alpha1_c

alpha1_c = *alpha1

Referenced by bl1_daxpyv3b(), for(), and if().

◆ alpha2_c

alpha2_c = *alpha2

Referenced by bl1_daxpyv3b(), for(), and if().

◆ chi1

chi1 = x1

Referenced by bl1_daxpyv3b(), for(), and if().

◆ chi2

chi2
Initial value:
{
double* restrict chi1

Referenced by bl1_daxpyv3b(), for(), and if().

◆ i

int i

Referenced by bl1_daxpyv3b().

◆ n_left

int n_left = n % 2

Referenced by bl1_daxpyv3b().

◆ n_run

int n_run = n / 2

Referenced by bl1_daxpyv3b().

◆ psi1

psi1 = y

Referenced by bl1_daxpyv3b(), for(), and if().

◆ temp1

double temp1

Referenced by for(), and if().

◆ temp2

double temp2

Referenced by for().

◆ twoinc_x1

int twoinc_x1 = 2*inc_x1

Referenced by for().

◆ twoinc_x2

int twoinc_x2 = 2*inc_x2

Referenced by for().

◆ twoinc_y

int twoinc_y = 2*inc_y

Referenced by for().