libflame revision_anchor
Functions
bl1_axpymt.c File Reference

(r)

Functions

void bl1_saxpymt (trans1_t trans, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
 
void bl1_daxpymt (trans1_t trans, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
 
void bl1_caxpymt (trans1_t trans, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
 
void bl1_zaxpymt (trans1_t trans, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
 

Function Documentation

◆ bl1_caxpymt()

void bl1_caxpymt ( trans1_t  trans,
int  m,
int  n,
scomplex alpha,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)
150{
154 int inca_temp;
155 int lda, inca;
156 int ldb, incb;
157 int n_iter;
158 int n_elem;
159 int j;
160
161 // Return early if possible.
162 if ( bl1_zero_dim2( m, n ) ) return;
163
164 // Handle cases where A and B are vectors to ensure that the underlying axpy
165 // gets invoked only once.
166 if ( bl1_is_vector( m, n ) )
167 {
168 // Initialize with values appropriate for vectors.
169 n_iter = 1;
170 n_elem = bl1_vector_dim( m, n );
171 lda = 1; // multiplied by zero when n_iter == 1; not needed.
172 inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
173 ldb = 1; // multiplied by zero when n_iter == 1; not needed.
175 }
176 else // matrix case
177 {
178 // Initialize with optimal values for column-major storage.
179 n_iter = n;
180 n_elem = m;
181 lda = a_cs;
182 inca = a_rs;
183 ldb = b_cs;
184 incb = b_rs;
185
186 // Handle the transposition of A.
187 if ( bl1_does_trans( trans ) )
188 {
190 }
191
192 // An optimization: if B is row-major and if A is effectively row-major
193 // after a possible transposition, then let's access the matrices by rows
194 // instead of by columns for increased spatial locality.
195 if ( bl1_is_row_storage( b_rs, b_cs ) )
196 {
197 if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
199 {
203 }
204 }
205 }
206
207 if ( bl1_does_conj( trans ) )
208 {
210
212 inca_temp = 1;
213
214 for ( j = 0; j < n_iter; j++ )
215 {
216 a_begin = a + j*lda;
217 b_begin = b + j*ldb;
218
220 n_elem,
221 a_begin, inca,
222 a_temp, inca_temp );
223
225 alpha,
227 b_begin, incb );
228 }
229
230 bl1_cfree( a_temp );
231 }
232 else // if ( !bl1_does_conj( trans ) )
233 {
234 for ( j = 0; j < n_iter; j++ )
235 {
236 a_begin = a + j*lda;
237 b_begin = b + j*ldb;
238
240 alpha,
241 a_begin, inca,
242 b_begin, incb );
243 }
244
245 }
246}
int i
Definition bl1_axmyv2.c:145
void bl1_caxpy(int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
Definition bl1_axpy.c:43
void bl1_ccopyv(conj1_t conj, int m, scomplex *x, int incx, scomplex *y, int incy)
Definition bl1_copyv.c:49
int bl1_does_notrans(trans1_t trans)
Definition bl1_does.c:19
int bl1_does_conj(trans1_t trans)
Definition bl1_does.c:25
int bl1_is_row_storage(int rs, int cs)
Definition bl1_is.c:95
int bl1_is_vector(int m, int n)
Definition bl1_is.c:106
int bl1_is_col_storage(int rs, int cs)
Definition bl1_is.c:90
conj1_t bl1_proj_trans1_to_conj(trans1_t trans)
Definition bl1_proj.c:13
int bl1_vector_dim(int m, int n)
Definition bl1_vector.c:13
int bl1_vector_inc(trans1_t trans, int m, int n, int rs, int cs)
Definition bl1_vector.c:19
int bl1_zero_dim2(int m, int n)
Definition bl1_is.c:118
int bl1_does_trans(trans1_t trans)
Definition bl1_does.c:13
void bl1_cfree(scomplex *p)
Definition bl1_free.c:40
scomplex * bl1_callocv(unsigned int n_elem)
Definition bl1_allocv.c:40
@ BLIS1_NO_TRANSPOSE
Definition blis_type_defs.h:54
conj1_t
Definition blis_type_defs.h:80
Definition blis_type_defs.h:133

References bl1_callocv(), bl1_caxpy(), bl1_ccopyv(), bl1_cfree(), bl1_does_conj(), bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by bl1_cgemm(), bl1_chemm(), bl1_csymm(), bl1_ctrmmsx(), bl1_ctrsmsx(), FLA_Axpy_external(), and FLA_Axpyt_external().

◆ bl1_daxpymt()

void bl1_daxpymt ( trans1_t  trans,
int  m,
int  n,
double alpha,
double a,
int  a_rs,
int  a_cs,
double b,
int  b_rs,
int  b_cs 
)
82{
83 double* a_begin;
84 double* b_begin;
85 int lda, inca;
86 int ldb, incb;
87 int n_iter;
88 int n_elem;
89 int j;
90
91 // Return early if possible.
92 if ( bl1_zero_dim2( m, n ) ) return;
93
94 // Handle cases where A and B are vectors to ensure that the underlying axpy
95 // gets invoked only once.
96 if ( bl1_is_vector( m, n ) )
97 {
98 // Initialize with values appropriate for vectors.
99 n_iter = 1;
100 n_elem = bl1_vector_dim( m, n );
101 lda = 1; // multiplied by zero when n_iter == 1; not needed.
102 inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
103 ldb = 1; // multiplied by zero when n_iter == 1; not needed.
105 }
106 else // matrix case
107 {
108 // Initialize with optimal values for column-major storage.
109 n_iter = n;
110 n_elem = m;
111 lda = a_cs;
112 inca = a_rs;
113 ldb = b_cs;
114 incb = b_rs;
115
116 // Handle the transposition of A.
117 if ( bl1_does_trans( trans ) )
118 {
120 }
121
122 // An optimization: if B is row-major and if A is effectively row-major
123 // after a possible transposition, then let's access the matrices by rows
124 // instead of by columns for increased spatial locality.
125 if ( bl1_is_row_storage( b_rs, b_cs ) )
126 {
127 if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
129 {
133 }
134 }
135 }
136
137 for ( j = 0; j < n_iter; j++ )
138 {
139 a_begin = a + j*lda;
140 b_begin = b + j*ldb;
141
143 alpha,
144 a_begin, inca,
145 b_begin, incb );
146 }
147}
void bl1_daxpy(int n, double *alpha, double *x, int incx, double *y, int incy)
Definition bl1_axpy.c:28

References bl1_daxpy(), bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by bl1_dgemm(), bl1_dsymm(), bl1_dtrmmsx(), bl1_dtrsmsx(), FLA_Axpy_external(), and FLA_Axpyt_external().

◆ bl1_saxpymt()

void bl1_saxpymt ( trans1_t  trans,
int  m,
int  n,
float alpha,
float a,
int  a_rs,
int  a_cs,
float b,
int  b_rs,
int  b_cs 
)
14{
15 float* a_begin;
16 float* b_begin;
17 int lda, inca;
18 int ldb, incb;
19 int n_iter;
20 int n_elem;
21 int j;
22
23 // Return early if possible.
24 if ( bl1_zero_dim2( m, n ) ) return;
25
26 // Handle cases where A and B are vectors to ensure that the underlying axpy
27 // gets invoked only once.
28 if ( bl1_is_vector( m, n ) )
29 {
30 // Initialize with values appropriate for vectors.
31 n_iter = 1;
32 n_elem = bl1_vector_dim( m, n );
33 lda = 1; // multiplied by zero when n_iter == 1; not needed.
34 inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
35 ldb = 1; // multiplied by zero when n_iter == 1; not needed.
37 }
38 else // matrix case
39 {
40 // Initialize with optimal values for column-major storage.
41 n_iter = n;
42 n_elem = m;
43 lda = a_cs;
44 inca = a_rs;
45 ldb = b_cs;
46 incb = b_rs;
47
48 // Handle the transposition of A.
49 if ( bl1_does_trans( trans ) )
50 {
52 }
53
54 // An optimization: if B is row-major and if A is effectively row-major
55 // after a possible transposition, then let's access the matrices by rows
56 // instead of by columns for increased spatial locality.
58 {
59 if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
61 {
65 }
66 }
67 }
68
69 for ( j = 0; j < n_iter; j++ )
70 {
71 a_begin = a + j*lda;
72 b_begin = b + j*ldb;
73
75 alpha,
76 a_begin, inca,
77 b_begin, incb );
78 }
79}
void bl1_saxpy(int n, float *alpha, float *x, int incx, float *y, int incy)
Definition bl1_axpy.c:13

References bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_saxpy(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by bl1_sgemm(), bl1_ssymm(), bl1_strmmsx(), bl1_strsmsx(), FLA_Axpy_external(), and FLA_Axpyt_external().

◆ bl1_zaxpymt()

void bl1_zaxpymt ( trans1_t  trans,
int  m,
int  n,
dcomplex alpha,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)
249{
253 int inca_temp;
254 int lda, inca;
255 int ldb, incb;
256 int n_iter;
257 int n_elem;
258 int j;
259
260 // Return early if possible.
261 if ( bl1_zero_dim2( m, n ) ) return;
262
263 // Handle cases where A and B are vectors to ensure that the underlying axpy
264 // gets invoked only once.
265 if ( bl1_is_vector( m, n ) )
266 {
267 // Initialize with values appropriate for vectors.
268 n_iter = 1;
269 n_elem = bl1_vector_dim( m, n );
270 lda = 1; // multiplied by zero when n_iter == 1; not needed.
271 inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
272 ldb = 1; // multiplied by zero when n_iter == 1; not needed.
274 }
275 else // matrix case
276 {
277 // Initialize with optimal values for column-major storage.
278 n_iter = n;
279 n_elem = m;
280 lda = a_cs;
281 inca = a_rs;
282 ldb = b_cs;
283 incb = b_rs;
284
285 // Handle the transposition of A.
286 if ( bl1_does_trans( trans ) )
287 {
289 }
290
291 // An optimization: if B is row-major and if A is effectively row-major
292 // after a possible transposition, then let's access the matrices by rows
293 // instead of by columns for increased spatial locality.
294 if ( bl1_is_row_storage( b_rs, b_cs ) )
295 {
296 if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
298 {
302 }
303 }
304 }
305
306 if ( bl1_does_conj( trans ) )
307 {
309
311 inca_temp = 1;
312
313 for ( j = 0; j < n_iter; j++ )
314 {
315 a_begin = a + j*lda;
316 b_begin = b + j*ldb;
317
319 n_elem,
320 a_begin, inca,
321 a_temp, inca_temp );
322
324 alpha,
326 b_begin, incb );
327 }
328
329 bl1_zfree( a_temp );
330 }
331 else // if ( !bl1_does_conj( trans ) )
332 {
333 for ( j = 0; j < n_iter; j++ )
334 {
335 a_begin = a + j*lda;
336 b_begin = b + j*ldb;
337
339 alpha,
340 a_begin, inca,
341 b_begin, incb );
342 }
343
344 }
345}
void bl1_zaxpy(int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy)
Definition bl1_axpy.c:58
void bl1_zcopyv(conj1_t conj, int m, dcomplex *x, int incx, dcomplex *y, int incy)
Definition bl1_copyv.c:63
dcomplex * bl1_zallocv(unsigned int n_elem)
Definition bl1_allocv.c:45
void bl1_zfree(dcomplex *p)
Definition bl1_free.c:45
Definition blis_type_defs.h:138

References bl1_does_conj(), bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zallocv(), bl1_zaxpy(), bl1_zcopyv(), bl1_zero_dim2(), bl1_zfree(), and BLIS1_NO_TRANSPOSE.

Referenced by bl1_zgemm(), bl1_zhemm(), bl1_zsymm(), bl1_ztrmmsx(), bl1_ztrsmsx(), FLA_Axpy_external(), and FLA_Axpyt_external().