libflame revision_anchor
Functions
bl1_axpysmt.c File Reference

(r)

Functions

void bl1_saxpysmt (trans1_t trans, int m, int n, float *alpha0, float *alpha1, float *a, int a_rs, int a_cs, float *beta, float *b, int b_rs, int b_cs)
 
void bl1_daxpysmt (trans1_t trans, int m, int n, double *alpha0, double *alpha1, double *a, int a_rs, int a_cs, double *beta, double *b, int b_rs, int b_cs)
 
void bl1_caxpysmt (trans1_t trans, int m, int n, scomplex *alpha0, scomplex *alpha1, scomplex *a, int a_rs, int a_cs, scomplex *beta, scomplex *b, int b_rs, int b_cs)
 
void bl1_zaxpysmt (trans1_t trans, int m, int n, dcomplex *alpha0, dcomplex *alpha1, dcomplex *a, int a_rs, int a_cs, dcomplex *beta, dcomplex *b, int b_rs, int b_cs)
 

Function Documentation

◆ bl1_caxpysmt()

void bl1_caxpysmt ( trans1_t  trans,
int  m,
int  n,
scomplex alpha0,
scomplex alpha1,
scomplex a,
int  a_rs,
int  a_cs,
scomplex beta,
scomplex b,
int  b_rs,
int  b_cs 
)
164{
169 int inca_temp;
170 int lda, inca;
171 int ldb, incb;
172 int n_iter;
173 int n_elem;
174 int j;
175
176 // Return early if possible.
177 if ( bl1_zero_dim2( m, n ) ) return;
178
179 alpha_prod.real = alpha0->real * alpha1->real - alpha0->imag * alpha1->imag;
180 alpha_prod.imag = alpha0->real * alpha1->imag + alpha0->imag * alpha1->real;
181
182 // Handle cases where A and B are vectors to ensure that the underlying axpy
183 // gets invoked only once.
184 if ( bl1_is_vector( m, n ) )
185 {
186 // Initialize with values appropriate for vectors.
187 n_iter = 1;
188 n_elem = bl1_vector_dim( m, n );
189 lda = 1; // multiplied by zero when n_iter == 1; not needed.
190 inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
191 ldb = 1; // multiplied by zero when n_iter == 1; not needed.
193 }
194 else // matrix case
195 {
196 // Initialize with optimal values for column-major storage.
197 n_iter = n;
198 n_elem = m;
199 lda = a_cs;
200 inca = a_rs;
201 ldb = b_cs;
202 incb = b_rs;
203
204 // Handle the transposition of A.
205 if ( bl1_does_trans( trans ) )
206 {
208 }
209
210 // An optimization: if B is row-major and if A is effectively row-major
211 // after a possible transposition, then let's access the matrices by rows
212 // instead of by columns for increased spatial locality.
213 if ( bl1_is_row_storage( b_rs, b_cs ) )
214 {
215 if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
217 {
221 }
222 }
223 }
224
225 if ( bl1_does_conj( trans ) )
226 {
228
230 inca_temp = 1;
231
232 for ( j = 0; j < n_iter; j++ )
233 {
234 a_begin = a + j*lda;
235 b_begin = b + j*ldb;
236
238 n_elem,
239 a_begin, inca,
240 a_temp, inca_temp );
241
243 beta,
244 b_begin, incb );
245
247 &alpha_prod,
249 b_begin, incb );
250 }
251
252 bl1_cfree( a_temp );
253 }
254 else // if ( !bl1_does_conj( trans ) )
255 {
256 for ( j = 0; j < n_iter; j++ )
257 {
258 a_begin = a + j*lda;
259 b_begin = b + j*ldb;
260
262 beta,
263 b_begin, incb );
264
266 &alpha_prod,
267 a_begin, inca,
268 b_begin, incb );
269 }
270 }
271}
int i
Definition bl1_axmyv2.c:145
void bl1_caxpy(int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
Definition bl1_axpy.c:43
double *restrict alpha1
Definition bl1_axpyv2bdotaxpy.c:198
void bl1_ccopyv(conj1_t conj, int m, scomplex *x, int incx, scomplex *y, int incy)
Definition bl1_copyv.c:49
void bl1_cscal(int n, scomplex *alpha, scomplex *x, int incx)
Definition bl1_scal.c:52
int bl1_does_notrans(trans1_t trans)
Definition bl1_does.c:19
int bl1_does_conj(trans1_t trans)
Definition bl1_does.c:25
int bl1_is_row_storage(int rs, int cs)
Definition bl1_is.c:95
int bl1_is_vector(int m, int n)
Definition bl1_is.c:106
int bl1_is_col_storage(int rs, int cs)
Definition bl1_is.c:90
conj1_t bl1_proj_trans1_to_conj(trans1_t trans)
Definition bl1_proj.c:13
int bl1_vector_dim(int m, int n)
Definition bl1_vector.c:13
int bl1_vector_inc(trans1_t trans, int m, int n, int rs, int cs)
Definition bl1_vector.c:19
int bl1_zero_dim2(int m, int n)
Definition bl1_is.c:118
int bl1_does_trans(trans1_t trans)
Definition bl1_does.c:13
void bl1_cfree(scomplex *p)
Definition bl1_free.c:40
scomplex * bl1_callocv(unsigned int n_elem)
Definition bl1_allocv.c:40
@ BLIS1_NO_TRANSPOSE
Definition blis_type_defs.h:54
conj1_t
Definition blis_type_defs.h:80
Definition blis_type_defs.h:133
float real
Definition blis_type_defs.h:134

References alpha1, bl1_callocv(), bl1_caxpy(), bl1_ccopyv(), bl1_cfree(), bl1_cscal(), bl1_does_conj(), bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), BLIS1_NO_TRANSPOSE, scomplex::imag, and scomplex::real.

Referenced by FLA_Axpys_external().

◆ bl1_daxpysmt()

void bl1_daxpysmt ( trans1_t  trans,
int  m,
int  n,
double alpha0,
double alpha1,
double a,
int  a_rs,
int  a_cs,
double beta,
double b,
int  b_rs,
int  b_cs 
)
89{
90 double* a_begin;
91 double* b_begin;
92 double alpha_prod;
93 int lda, inca;
94 int ldb, incb;
95 int n_iter;
96 int n_elem;
97 int j;
98
99 // Return early if possible.
100 if ( bl1_zero_dim2( m, n ) ) return;
101
102 alpha_prod = (*alpha0) * (*alpha1);
103
104 // Handle cases where A and B are vectors to ensure that the underlying axpy
105 // gets invoked only once.
106 if ( bl1_is_vector( m, n ) )
107 {
108 // Initialize with values appropriate for vectors.
109 n_iter = 1;
110 n_elem = bl1_vector_dim( m, n );
111 lda = 1; // multiplied by zero when n_iter == 1; not needed.
112 inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
113 ldb = 1; // multiplied by zero when n_iter == 1; not needed.
115 }
116 else // matrix case
117 {
118 // Initialize with optimal values for column-major storage.
119 n_iter = n;
120 n_elem = m;
121 lda = a_cs;
122 inca = a_rs;
123 ldb = b_cs;
124 incb = b_rs;
125
126 // Handle the transposition of A.
127 if ( bl1_does_trans( trans ) )
128 {
130 }
131
132 // An optimization: if B is row-major and if A is effectively row-major
133 // after a possible transposition, then let's access the matrices by rows
134 // instead of by columns for increased spatial locality.
135 if ( bl1_is_row_storage( b_rs, b_cs ) )
136 {
137 if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
139 {
143 }
144 }
145 }
146
147 for ( j = 0; j < n_iter; j++ )
148 {
149 a_begin = a + j*lda;
150 b_begin = b + j*ldb;
151
153 beta,
154 b_begin, incb );
155
157 &alpha_prod,
158 a_begin, inca,
159 b_begin, incb );
160 }
161}
void bl1_daxpy(int n, double *alpha, double *x, int incx, double *y, int incy)
Definition bl1_axpy.c:28
void bl1_dscal(int n, double *alpha, double *x, int incx)
Definition bl1_scal.c:26

References bl1_daxpy(), bl1_does_notrans(), bl1_does_trans(), bl1_dscal(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Axpys_external().

◆ bl1_saxpysmt()

void bl1_saxpysmt ( trans1_t  trans,
int  m,
int  n,
float alpha0,
float alpha1,
float a,
int  a_rs,
int  a_cs,
float beta,
float b,
int  b_rs,
int  b_cs 
)
14{
15 float* a_begin;
16 float* b_begin;
17 float alpha_prod;
18 int lda, inca;
19 int ldb, incb;
20 int n_iter;
21 int n_elem;
22 int j;
23
24 // Return early if possible.
25 if ( bl1_zero_dim2( m, n ) ) return;
26
27 alpha_prod = (*alpha0) * (*alpha1);
28
29 // Handle cases where A and B are vectors to ensure that the underlying axpy
30 // gets invoked only once.
31 if ( bl1_is_vector( m, n ) )
32 {
33 // Initialize with values appropriate for vectors.
34 n_iter = 1;
35 n_elem = bl1_vector_dim( m, n );
36 lda = 1; // multiplied by zero when n_iter == 1; not needed.
37 inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
38 ldb = 1; // multiplied by zero when n_iter == 1; not needed.
40 }
41 else // matrix case
42 {
43 // Initialize with optimal values for column-major storage.
44 n_iter = n;
45 n_elem = m;
46 lda = a_cs;
47 inca = a_rs;
48 ldb = b_cs;
49 incb = b_rs;
50
51 // Handle the transposition of A.
52 if ( bl1_does_trans( trans ) )
53 {
55 }
56
57 // An optimization: if B is row-major and if A is effectively row-major
58 // after a possible transposition, then let's access the matrices by rows
59 // instead of by columns for increased spatial locality.
61 {
62 if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
64 {
68 }
69 }
70 }
71
72 for ( j = 0; j < n_iter; j++ )
73 {
74 a_begin = a + j*lda;
75 b_begin = b + j*ldb;
76
78 beta,
79 b_begin, incb );
80
83 a_begin, inca,
84 b_begin, incb );
85 }
86}
void bl1_saxpy(int n, float *alpha, float *x, int incx, float *y, int incy)
Definition bl1_axpy.c:13
void bl1_sscal(int n, float *alpha, float *x, int incx)
Definition bl1_scal.c:13

References bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_saxpy(), bl1_sscal(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Axpys_external().

◆ bl1_zaxpysmt()

void bl1_zaxpysmt ( trans1_t  trans,
int  m,
int  n,
dcomplex alpha0,
dcomplex alpha1,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex beta,
dcomplex b,
int  b_rs,
int  b_cs 
)
274{
279 int inca_temp;
280 int lda, inca;
281 int ldb, incb;
282 int n_iter;
283 int n_elem;
284 int j;
285
286 // Return early if possible.
287 if ( bl1_zero_dim2( m, n ) ) return;
288
289 alpha_prod.real = alpha0->real * alpha1->real - alpha0->imag * alpha1->imag;
290 alpha_prod.imag = alpha0->real * alpha1->imag + alpha0->imag * alpha1->real;
291
292 // Handle cases where A and B are vectors to ensure that the underlying axpy
293 // gets invoked only once.
294 if ( bl1_is_vector( m, n ) )
295 {
296 // Initialize with values appropriate for vectors.
297 n_iter = 1;
298 n_elem = bl1_vector_dim( m, n );
299 lda = 1; // multiplied by zero when n_iter == 1; not needed.
300 inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
301 ldb = 1; // multiplied by zero when n_iter == 1; not needed.
303 }
304 else // matrix case
305 {
306 // Initialize with optimal values for column-major storage.
307 n_iter = n;
308 n_elem = m;
309 lda = a_cs;
310 inca = a_rs;
311 ldb = b_cs;
312 incb = b_rs;
313
314 // Handle the transposition of A.
315 if ( bl1_does_trans( trans ) )
316 {
318 }
319
320 // An optimization: if B is row-major and if A is effectively row-major
321 // after a possible transposition, then let's access the matrices by rows
322 // instead of by columns for increased spatial locality.
323 if ( bl1_is_row_storage( b_rs, b_cs ) )
324 {
325 if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
327 {
331 }
332 }
333 }
334
335 if ( bl1_does_conj( trans ) )
336 {
338
340 inca_temp = 1;
341
342 for ( j = 0; j < n_iter; j++ )
343 {
344 a_begin = a + j*lda;
345 b_begin = b + j*ldb;
346
348 n_elem,
349 a_begin, inca,
350 a_temp, inca_temp );
351
353 beta,
354 b_begin, incb );
355
357 &alpha_prod,
359 b_begin, incb );
360 }
361
362 bl1_zfree( a_temp );
363 }
364 else // if ( !bl1_does_conj( trans ) )
365 {
366 for ( j = 0; j < n_iter; j++ )
367 {
368 a_begin = a + j*lda;
369 b_begin = b + j*ldb;
370
372 beta,
373 b_begin, incb );
374
376 &alpha_prod,
377 a_begin, inca,
378 b_begin, incb );
379 }
380 }
381}
void bl1_zaxpy(int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy)
Definition bl1_axpy.c:58
void bl1_zcopyv(conj1_t conj, int m, dcomplex *x, int incx, dcomplex *y, int incy)
Definition bl1_copyv.c:63
void bl1_zscal(int n, dcomplex *alpha, dcomplex *x, int incx)
Definition bl1_scal.c:78
dcomplex * bl1_zallocv(unsigned int n_elem)
Definition bl1_allocv.c:45
void bl1_zfree(dcomplex *p)
Definition bl1_free.c:45
Definition blis_type_defs.h:138
double real
Definition blis_type_defs.h:139

References alpha1, bl1_does_conj(), bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zallocv(), bl1_zaxpy(), bl1_zcopyv(), bl1_zero_dim2(), bl1_zfree(), bl1_zscal(), BLIS1_NO_TRANSPOSE, dcomplex::imag, and dcomplex::real.

Referenced by FLA_Axpys_external().