libflame revision_anchor
Functions
bl1_herk.c File Reference

(r)

Functions

void bl1_sherk (uplo1_t uplo, trans1_t trans, int m, int k, float *alpha, float *a, int a_rs, int a_cs, float *beta, float *c, int c_rs, int c_cs)
 
void bl1_dherk (uplo1_t uplo, trans1_t trans, int m, int k, double *alpha, double *a, int a_rs, int a_cs, double *beta, double *c, int c_rs, int c_cs)
 
void bl1_cherk (uplo1_t uplo, trans1_t trans, int m, int k, float *alpha, scomplex *a, int a_rs, int a_cs, float *beta, scomplex *c, int c_rs, int c_cs)
 
void bl1_zherk (uplo1_t uplo, trans1_t trans, int m, int k, double *alpha, dcomplex *a, int a_rs, int a_cs, double *beta, dcomplex *c, int c_rs, int c_cs)
 
void bl1_cherk_blas (uplo1_t uplo, trans1_t trans, int m, int k, float *alpha, scomplex *a, int lda, float *beta, scomplex *c, int ldc)
 
void bl1_zherk_blas (uplo1_t uplo, trans1_t trans, int m, int k, double *alpha, dcomplex *a, int lda, double *beta, dcomplex *c, int ldc)
 

Function Documentation

◆ bl1_cherk()

void bl1_cherk ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
float alpha,
scomplex a,
int  a_rs,
int  a_cs,
float beta,
scomplex c,
int  c_rs,
int  c_cs 
)
37{
38 uplo1_t uplo_save = uplo;
39 int m_save = m;
40 scomplex* a_save = a;
41 scomplex* c_save = c;
42 int a_rs_save = a_rs;
43 int a_cs_save = a_cs;
44 int c_rs_save = c_rs;
45 int c_cs_save = c_cs;
46 float zero_r = bl1_s0();
49 int lda, inca;
50 int ldc, incc;
53
54 // Return early if possible.
55 if ( bl1_zero_dim2( m, k ) ) return;
56
57 // If necessary, allocate, initialize, and use a temporary contiguous
58 // copy of each matrix rather than the original matrices.
60 m,
61 k,
63 &a, &a_rs, &a_cs );
64
66 m,
67 m,
69 &c, &c_rs, &c_cs );
70
71 // Initialize with values assuming column-major storage.
72 lda = a_cs;
73 inca = a_rs;
74 ldc = c_cs;
75 incc = c_rs;
76
77 // Adjust the parameters based on the storage of each matrix.
79 {
81 {
82 // requested operation: uplo( C_c ) += A_c * A_c'
83 // effective operation: uplo( C_c ) += A_c * A_c'
84 }
85 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
86 {
87 // requested operation: uplo( C_c ) += A_r * A_r'
88 // effective operation: uplo( C_c ) += conj( A_c' * A_c )
90
92
94 }
95 }
96 else // if ( bl1_is_row_storage( c_rs, c_cs ) )
97 {
99 {
100 // requested operation: uplo( C_r ) += A_c * A_c'
101 // effective operation: ~uplo( C_c ) += conj( A_c * A_c' )
103
104 bl1_toggle_uplo( uplo );
105
107 }
108 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
109 {
110 // requested operation: uplo( C_r ) += A_r * A_r'
111 // effective operation: ~uplo( C_c ) += A_c' * A_c
114
115 bl1_toggle_uplo( uplo );
117 }
118 }
119
120 // There are two cases where we need to perform the rank-k product and
121 // then axpy the result into C with a conjugation. We handle those two
122 // cases here.
123 if ( herk_needs_conj )
124 {
125 // We need a temporary matrix for holding the rank-k product.
126 c_conj = bl1_callocm( m, m );
127 ldc_conj = m;
128 incc_conj = 1;
129
130 // Compute the rank-k product.
131 bl1_cherk_blas( uplo,
132 trans,
133 m,
134 k,
135 alpha,
136 a, lda,
137 &zero_r,
138 c_conj, ldc_conj );
139
140 // Scale C by beta.
141 bl1_csscalmr( uplo,
142 m,
143 m,
144 beta,
145 c, incc, ldc );
146
147 // And finally, accumulate the rank-k product in C_conj into C
148 // with a conjugation.
149 bl1_caxpymrt( uplo,
151 m,
152 m,
153 &one,
155 c, incc, ldc );
156
157 // Free the temporary matrix for C.
158 bl1_cfree( c_conj );
159 }
160 else
161 {
162 bl1_cherk_blas( uplo,
163 trans,
164 m,
165 k,
166 alpha,
167 a, lda,
168 beta,
169 c, ldc );
170 }
171
172 // Free any temporary contiguous matrices, copying the result back to
173 // the original matrix.
175 &a, &a_rs, &a_cs );
176
178 m_save,
179 m_save,
181 &c, &c_rs, &c_cs );
182}
int i
Definition bl1_axmyv2.c:145
void bl1_caxpymrt(uplo1_t uplo, trans1_t trans, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
Definition bl1_axpymrt.c:227
void bl1_cherk_blas(uplo1_t uplo, trans1_t trans, int m, int k, float *alpha, scomplex *a, int lda, float *beta, scomplex *c, int ldc)
Definition bl1_herk.c:334
void bl1_csscalmr(uplo1_t uplo, int m, int n, float *alpha, scomplex *a, int a_rs, int a_cs)
Definition bl1_scalmr.c:125
int bl1_is_col_storage(int rs, int cs)
Definition bl1_is.c:90
int bl1_zero_dim2(int m, int n)
Definition bl1_is.c:118
float bl1_s0(void)
Definition bl1_constants.c:111
void bl1_cfree_contigm(scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition bl1_free_contigm.c:45
scomplex bl1_c1(void)
Definition bl1_constants.c:61
void bl1_cfree(scomplex *p)
Definition bl1_free.c:40
void bl1_cfree_saved_contigmr(uplo1_t uplo, int m, int n, scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition bl1_free_saved_contigmr.c:59
scomplex * bl1_callocm(unsigned int m, unsigned int n)
Definition bl1_allocm.c:40
void bl1_ccreate_contigmr(uplo1_t uplo, int m, int n, scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition bl1_create_contigmr.c:77
void bl1_ccreate_contigmt(trans1_t trans_dims, int m, int n, scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition bl1_create_contigmt.c:89
uplo1_t
Definition blis_type_defs.h:61
@ BLIS1_CONJ_NO_TRANSPOSE
Definition blis_type_defs.h:56
Definition blis_type_defs.h:133

References bl1_c1(), bl1_callocm(), bl1_caxpymrt(), bl1_ccreate_contigmr(), bl1_ccreate_contigmt(), bl1_cfree(), bl1_cfree_contigm(), bl1_cfree_saved_contigmr(), bl1_cherk_blas(), bl1_csscalmr(), bl1_is_col_storage(), bl1_s0(), bl1_zero_dim2(), and BLIS1_CONJ_NO_TRANSPOSE.

Referenced by FLA_Herk_external(), and FLA_UDdate_UT_opc_var1().

◆ bl1_cherk_blas()

void bl1_cherk_blas ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
float alpha,
scomplex a,
int  lda,
float beta,
scomplex c,
int  ldc 
)
335{
336#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
340
343
347 m,
348 k,
349 *alpha,
350 a, lda,
351 *beta,
352 c, ldc );
353#else
354 char blas_uplo;
355 char blas_trans;
356
359
361 &blas_trans,
362 &m,
363 &k,
364 alpha,
365 a, &lda,
366 beta,
367 c, &ldc );
368#endif
369}
void F77_cherk(char *uplo, char *transa, int *n, int *k, float *alpha, scomplex *a, int *lda, float *beta, scomplex *c, int *ldc)
CBLAS_ORDER
Definition blis_prototypes_cblas.h:17
@ CblasColMajor
Definition blis_prototypes_cblas.h:17
CBLAS_UPLO
Definition blis_prototypes_cblas.h:19
CBLAS_TRANSPOSE
Definition blis_prototypes_cblas.h:18
void cblas_cherk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const float alpha, const void *A, const int lda, const float beta, void *C, const int ldc)
void bl1_param_map_to_netlib_trans(trans1_t blis_trans, void *blas_trans)
Definition bl1_param_map.c:15
void bl1_param_map_to_netlib_uplo(uplo1_t blis_uplo, void *blas_uplo)
Definition bl1_param_map.c:47

References bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), cblas_cherk(), CblasColMajor, and F77_cherk().

Referenced by bl1_cherk().

◆ bl1_dherk()

void bl1_dherk ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
double alpha,
double a,
int  a_rs,
int  a_cs,
double beta,
double c,
int  c_rs,
int  c_cs 
)
25{
26 bl1_dsyrk( uplo,
27 trans,
28 m,
29 k,
30 alpha,
31 a, a_rs, a_cs,
32 beta,
33 c, c_rs, c_cs );
34}
void bl1_dsyrk(uplo1_t uplo, trans1_t trans, int m, int k, double *alpha, double *a, int a_rs, int a_cs, double *beta, double *c, int c_rs, int c_cs)
Definition bl1_syrk.c:109

References bl1_dsyrk().

◆ bl1_sherk()

void bl1_sherk ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
float alpha,
float a,
int  a_rs,
int  a_cs,
float beta,
float c,
int  c_rs,
int  c_cs 
)
14{
15 bl1_ssyrk( uplo,
16 trans,
17 m,
18 k,
19 alpha,
20 a, a_rs, a_cs,
21 beta,
22 c, c_rs, c_cs );
23}
void bl1_ssyrk(uplo1_t uplo, trans1_t trans, int m, int k, float *alpha, float *a, int a_rs, int a_cs, float *beta, float *c, int c_rs, int c_cs)
Definition bl1_syrk.c:13

References bl1_ssyrk().

◆ bl1_zherk()

void bl1_zherk ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
double alpha,
dcomplex a,
int  a_rs,
int  a_cs,
double beta,
dcomplex c,
int  c_rs,
int  c_cs 
)
185{
186 uplo1_t uplo_save = uplo;
187 int m_save = m;
188 dcomplex* a_save = a;
189 dcomplex* c_save = c;
190 int a_rs_save = a_rs;
191 int a_cs_save = a_cs;
192 int c_rs_save = c_rs;
193 int c_cs_save = c_cs;
194 double zero_r = bl1_d0();
195 dcomplex one = bl1_z1();
197 int lda, inca;
198 int ldc, incc;
199 int ldc_conj, incc_conj;
201
202 // Return early if possible.
203 if ( bl1_zero_dim2( m, k ) ) return;
204
205 // If necessary, allocate, initialize, and use a temporary contiguous
206 // copy of each matrix rather than the original matrices.
208 m,
209 k,
211 &a, &a_rs, &a_cs );
212
214 m,
215 m,
217 &c, &c_rs, &c_cs );
218
219 // Initialize with values assuming column-major storage.
220 lda = a_cs;
221 inca = a_rs;
222 ldc = c_cs;
223 incc = c_rs;
224
225 // Adjust the parameters based on the storage of each matrix.
226 if ( bl1_is_col_storage( c_rs, c_cs ) )
227 {
228 if ( bl1_is_col_storage( a_rs, a_cs ) )
229 {
230 // requested operation: uplo( C_c ) += A_c * A_c'
231 // effective operation: uplo( C_c ) += A_c * A_c'
232 }
233 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
234 {
235 // requested operation: uplo( C_c ) += A_r * A_r'
236 // effective operation: uplo( C_c ) += conj( A_c' * A_c )
238
240
242 }
243 }
244 else // if ( bl1_is_row_storage( c_rs, c_cs ) )
245 {
246 if ( bl1_is_col_storage( a_rs, a_cs ) )
247 {
248 // requested operation: uplo( C_r ) += A_c * A_c'
249 // effective operation: ~uplo( C_c ) += conj( A_c * A_c' )
251
252 bl1_toggle_uplo( uplo );
253
255 }
256 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
257 {
258 // requested operation: uplo( C_r ) += A_r * A_r'
259 // effective operation: ~uplo( C_c ) += A_c' * A_c
262
263 bl1_toggle_uplo( uplo );
265 }
266 }
267
268 // There are two cases where we need to perform the rank-k product and
269 // then axpy the result into C with a conjugation. We handle those two
270 // cases here.
271 if ( herk_needs_conj )
272 {
273 // We need a temporary matrix for holding the rank-k product.
274 c_conj = bl1_zallocm( m, m );
275 ldc_conj = m;
276 incc_conj = 1;
277
278 // Compute the rank-k product.
279 bl1_zherk_blas( uplo,
280 trans,
281 m,
282 k,
283 alpha,
284 a, lda,
285 &zero_r,
286 c_conj, ldc_conj );
287
288 // Scale C by beta.
289 bl1_zdscalmr( uplo,
290 m,
291 m,
292 beta,
293 c, incc, ldc );
294
295 // And finally, accumulate the rank-k product in C_conj into C
296 // with a conjugation.
297 bl1_zaxpymrt( uplo,
299 m,
300 m,
301 &one,
303 c, incc, ldc );
304
305 // Free the temporary matrix for C.
306 bl1_zfree( c_conj );
307 }
308 else
309 {
310 bl1_zherk_blas( uplo,
311 trans,
312 m,
313 k,
314 alpha,
315 a, lda,
316 beta,
317 c, ldc );
318 }
319
320 // Free any temporary contiguous matrices, copying the result back to
321 // the original matrix.
323 &a, &a_rs, &a_cs );
324
326 m_save,
327 m_save,
329 &c, &c_rs, &c_cs );
330}
void bl1_zaxpymrt(uplo1_t uplo, trans1_t trans, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
Definition bl1_axpymrt.c:334
void bl1_zherk_blas(uplo1_t uplo, trans1_t trans, int m, int k, double *alpha, dcomplex *a, int lda, double *beta, dcomplex *c, int ldc)
Definition bl1_herk.c:371
void bl1_zdscalmr(uplo1_t uplo, int m, int n, double *alpha, dcomplex *a, int a_rs, int a_cs)
Definition bl1_scalmr.c:237
void bl1_zfree_saved_contigmr(uplo1_t uplo, int m, int n, dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition bl1_free_saved_contigmr.c:82
void bl1_zcreate_contigmt(trans1_t trans_dims, int m, int n, dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition bl1_create_contigmt.c:127
dcomplex bl1_z1(void)
Definition bl1_constants.c:69
void bl1_zcreate_contigmr(uplo1_t uplo, int m, int n, dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition bl1_create_contigmr.c:109
double bl1_d0(void)
Definition bl1_constants.c:118
dcomplex * bl1_zallocm(unsigned int m, unsigned int n)
Definition bl1_allocm.c:45
void bl1_zfree(dcomplex *p)
Definition bl1_free.c:45
void bl1_zfree_contigm(dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition bl1_free_contigm.c:61
Definition blis_type_defs.h:138

References bl1_d0(), bl1_is_col_storage(), bl1_z1(), bl1_zallocm(), bl1_zaxpymrt(), bl1_zcreate_contigmr(), bl1_zcreate_contigmt(), bl1_zdscalmr(), bl1_zero_dim2(), bl1_zfree(), bl1_zfree_contigm(), bl1_zfree_saved_contigmr(), bl1_zherk_blas(), and BLIS1_CONJ_NO_TRANSPOSE.

Referenced by FLA_Herk_external(), and FLA_UDdate_UT_opz_var1().

◆ bl1_zherk_blas()

void bl1_zherk_blas ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
double alpha,
dcomplex a,
int  lda,
double beta,
dcomplex c,
int  ldc 
)
372{
373#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
377
380
384 m,
385 k,
386 *alpha,
387 a, lda,
388 *beta,
389 c, ldc );
390#else
391 char blas_uplo;
392 char blas_trans;
393
396
398 &blas_trans,
399 &m,
400 &k,
401 alpha,
402 a, &lda,
403 beta,
404 c, &ldc );
405#endif
406}
void F77_zherk(char *uplo, char *transa, int *n, int *k, double *alpha, dcomplex *a, int *lda, double *beta, dcomplex *c, int *ldc)
void cblas_zherk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const double alpha, const void *A, const int lda, const double beta, void *C, const int ldc)

References bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), cblas_zherk(), CblasColMajor, and F77_zherk().

Referenced by bl1_zherk().