libflame revision_anchor
Functions
bl1_symm.c File Reference

(r)

Functions

void bl1_ssymm (side1_t side, uplo1_t uplo, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs, float *beta, float *c, int c_rs, int c_cs)
 
void bl1_dsymm (side1_t side, uplo1_t uplo, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs, double *beta, double *c, int c_rs, int c_cs)
 
void bl1_csymm (side1_t side, uplo1_t uplo, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs, scomplex *beta, scomplex *c, int c_rs, int c_cs)
 
void bl1_zsymm (side1_t side, uplo1_t uplo, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs, dcomplex *beta, dcomplex *c, int c_rs, int c_cs)
 
void bl1_ssymm_blas (side1_t side, uplo1_t uplo, int m, int n, float *alpha, float *a, int lda, float *b, int ldb, float *beta, float *c, int ldc)
 
void bl1_dsymm_blas (side1_t side, uplo1_t uplo, int m, int n, double *alpha, double *a, int lda, double *b, int ldb, double *beta, double *c, int ldc)
 
void bl1_csymm_blas (side1_t side, uplo1_t uplo, int m, int n, scomplex *alpha, scomplex *a, int lda, scomplex *b, int ldb, scomplex *beta, scomplex *c, int ldc)
 
void bl1_zsymm_blas (side1_t side, uplo1_t uplo, int m, int n, dcomplex *alpha, dcomplex *a, int lda, dcomplex *b, int ldb, dcomplex *beta, dcomplex *c, int ldc)
 

Function Documentation

◆ bl1_csymm()

void bl1_csymm ( side1_t  side,
uplo1_t  uplo,
int  m,
int  n,
scomplex alpha,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs,
scomplex beta,
scomplex c,
int  c_rs,
int  c_cs 
)
536{
537 int m_save = m;
538 int n_save = n;
539 scomplex* a_save = a;
540 scomplex* b_save = b;
541 scomplex* c_save = c;
542 int a_rs_save = a_rs;
543 int a_cs_save = a_cs;
544 int b_rs_save = b_rs;
545 int b_cs_save = b_cs;
546 int c_rs_save = c_rs;
547 int c_cs_save = c_cs;
548 scomplex zero = bl1_c0();
549 scomplex one = bl1_c1();
552 int dim_a;
553 int lda, inca;
554 int ldb, incb;
555 int ldc, incc;
556 int ldb_copy, incb_copy;
561
562 // Return early if possible.
563 if ( bl1_zero_dim2( m, n ) ) return;
564
565 // If necessary, allocate, initialize, and use a temporary contiguous
566 // copy of each matrix rather than the original matrices.
569 dim_a,
570 dim_a,
572 &a, &a_rs, &a_cs );
573
575 n,
577 &b, &b_rs, &b_cs );
578
580 n,
582 &c, &c_rs, &c_cs );
583
584 // Initialize with values assuming column-major storage.
585 lda = a_cs;
586 inca = a_rs;
587 ldb = b_cs;
588 incb = b_rs;
589 ldc = c_cs;
590 incc = c_rs;
591
592 // Adjust the parameters based on the storage of each matrix.
593 if ( bl1_is_col_storage( c_rs, c_cs ) )
594 {
595 if ( bl1_is_col_storage( a_rs, a_cs ) )
596 {
597 if ( bl1_is_col_storage( b_rs, b_cs ) )
598 {
599 // requested operation: C_c += uplo( A_c ) * B_c
600 // effective operation: C_c += uplo( A_c ) * B_c
601 }
602 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
603 {
604 // requested operation: C_c += uplo( A_c ) * B_r
605 // effective operation: C_c += uplo( A_c ) * B_c
607 }
608 }
609 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
610 {
611 if ( bl1_is_col_storage( b_rs, b_cs ) )
612 {
613 // requested operation: C_c += uplo( A_r ) * B_c
614 // effective operation: C_c += ~uplo( conj( A_c ) ) * B_c
616
617 bl1_toggle_uplo( uplo );
618 }
619 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
620 {
621 // requested operation: C_c += uplo( A_r ) * B_r
622 // effective operation: C_c += ( B_c * ~uplo( conj( A_c ) ) )^T
625
627 bl1_toggle_uplo( uplo );
628
630 }
631 }
632 }
633 else // if ( bl1_is_row_storage( c_rs, c_cs ) )
634 {
635 if ( bl1_is_col_storage( a_rs, a_cs ) )
636 {
637 if ( bl1_is_col_storage( b_rs, b_cs ) )
638 {
639 // requested operation: C_r += uplo( A_c ) * B_c
640 // effective operation: C_c += ( uplo( A_c ) * B_c )^T
642
643 bl1_swap_ints( m, n );
644
646 }
647 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
648 {
649 // requested operation: C_r += uplo( A_c ) * B_r
650 // effective operation: C_c += B_c * ~uplo( conj( A_c ) )
653
654 bl1_swap_ints( m, n );
655
657 }
658 }
659 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
660 {
661 if ( bl1_is_col_storage( b_rs, b_cs ) )
662 {
663 // requested operation: C_r += uplo( A_r ) * B_c
664 // effective operation: C_c += B_c^T * ~uplo( A_c )
667
668 bl1_swap_ints( m, n );
669
671 bl1_toggle_uplo( uplo );
672
675 }
676 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
677 {
678 // requested operation: C_r += uplo( A_r ) * B_r
679 // effective operation: C_c += B_c * conj( ~uplo( A_c ) )
683
684 bl1_swap_ints( m, n );
685
686 bl1_toggle_uplo( uplo );
688 }
689 }
690 }
691
692 // We need a temporary matrix for the cases where B needs to be copied.
693 b_copy = b;
694 ldb_copy = ldb;
695 incb_copy = incb;
696
697 // There are two cases where we need to make a copy of B: one where the
698 // copy's dimensions are transposed from the original B, and one where
699 // the dimensions are not swapped.
700 if ( symm_needs_copyb )
701 {
703
704 // Set transb, which determines whether or not we need to copy from B
705 // as if it needs a transposition. If a transposition is needed, then
706 // m and n and have already been swapped. So in either case m
707 // represents the leading dimension of the copy.
710
711 b_copy = bl1_callocm( m, n );
712 ldb_copy = m;
713 incb_copy = 1;
714
716 m,
717 n,
718 b, incb, ldb,
720 }
721
722 // There are two cases where we need to perform the symm and then axpy
723 // the result into C with a transposition. We handle those cases here.
724 if ( symm_needs_axpyt )
725 {
726 // We need a temporary matrix for holding C^T. Notice that m and n
727 // represent the dimensions of C, and thus C_trans is n-by-m
728 // (interpreting both as column-major matrices). So the leading
729 // dimension of the temporary matrix holding C^T is n.
730 c_trans = bl1_callocm( n, m );
731 ldc_trans = n;
732 incc_trans = 1;
733
734 // Compute A * B (or B * A) and store the result in C_trans.
735 // Note that there is no overlap between the axpyt cases and
736 // the conja/copyb cases, hence the use of a, b, lda, and ldb.
738 uplo,
739 n,
740 m,
741 alpha,
742 a, lda,
743 b, ldb,
744 &zero,
746
747 // Scale C by beta.
749 m,
750 n,
751 beta,
752 c, incc, ldc );
753
754 // And finally, accumulate the matrix product in C_trans into C
755 // with a transpose.
757 m,
758 n,
759 &one,
761 c, incc, ldc );
762
763 // Free the temporary matrix for C.
765 }
766 else // no extra axpyt step needed
767 {
769 uplo,
770 m,
771 n,
772 alpha,
773 a, lda,
775 beta,
776 c, ldc );
777 }
778
779 if ( symm_needs_copyb )
780 bl1_cfree( b_copy );
781
782 // Free any temporary contiguous matrices, copying the result back to
783 // the original matrix.
785 &a, &a_rs, &a_cs );
786
788 &b, &b_rs, &b_cs );
789
791 n_save,
793 &c, &c_rs, &c_cs );
794}
int i
Definition bl1_axmyv2.c:145
void bl1_caxpymt(trans1_t trans, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
Definition bl1_axpymt.c:149
void bl1_ccopymt(trans1_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
Definition bl1_copymt.c:215
void bl1_cscalm(conj1_t conj, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs)
Definition bl1_scalm.c:169
void bl1_csymm_blas(side1_t side, uplo1_t uplo, int m, int n, scomplex *alpha, scomplex *a, int lda, scomplex *b, int ldb, scomplex *beta, scomplex *c, int ldc)
Definition bl1_symm.c:1137
int bl1_is_col_storage(int rs, int cs)
Definition bl1_is.c:90
int bl1_zero_dim2(int m, int n)
Definition bl1_is.c:118
void bl1_cfree_contigm(scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition bl1_free_contigm.c:45
scomplex bl1_c1(void)
Definition bl1_constants.c:61
void bl1_cfree(scomplex *p)
Definition bl1_free.c:40
void bl1_ccreate_contigm(int m, int n, scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition bl1_create_contigm.c:81
scomplex * bl1_callocm(unsigned int m, unsigned int n)
Definition bl1_allocm.c:40
scomplex bl1_c0(void)
Definition bl1_constants.c:125
void bl1_ccreate_contigmr(uplo1_t uplo, int m, int n, scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition bl1_create_contigmr.c:77
void bl1_cfree_saved_contigm(int m, int n, scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition bl1_free_saved_contigm.c:59
void bl1_set_dim_with_side(side1_t side, int m, int n, int *dim_new)
Definition bl1_set_dims.c:27
trans1_t
Definition blis_type_defs.h:53
@ BLIS1_NO_TRANSPOSE
Definition blis_type_defs.h:54
@ BLIS1_TRANSPOSE
Definition blis_type_defs.h:55
@ BLIS1_NO_CONJUGATE
Definition blis_type_defs.h:81
Definition blis_type_defs.h:133

References bl1_c0(), bl1_c1(), bl1_callocm(), bl1_caxpymt(), bl1_ccopymt(), bl1_ccreate_contigm(), bl1_ccreate_contigmr(), bl1_cfree(), bl1_cfree_contigm(), bl1_cfree_saved_contigm(), bl1_cscalm(), bl1_csymm_blas(), bl1_is_col_storage(), bl1_set_dim_with_side(), bl1_zero_dim2(), BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, and BLIS1_TRANSPOSE.

Referenced by FLA_Symm_external().

◆ bl1_csymm_blas()

void bl1_csymm_blas ( side1_t  side,
uplo1_t  uplo,
int  m,
int  n,
scomplex alpha,
scomplex a,
int  lda,
scomplex b,
int  ldb,
scomplex beta,
scomplex c,
int  ldc 
)
1138{
1139#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
1143
1146
1148 cblas_side,
1149 cblas_uplo,
1150 m,
1151 n,
1152 alpha,
1153 a, lda,
1154 b, ldb,
1155 beta,
1156 c, ldc );
1157#else
1158 char blas_side;
1159 char blas_uplo;
1160
1163
1165 &blas_uplo,
1166 &m,
1167 &n,
1168 alpha,
1169 a, &lda,
1170 b, &ldb,
1171 beta,
1172 c, &ldc );
1173#endif
1174}
void F77_csymm(char *side, char *uplo, int *m, int *n, scomplex *alpha, scomplex *a, int *lda, scomplex *b, int *ldb, scomplex *beta, scomplex *c, int *ldc)
CBLAS_ORDER
Definition blis_prototypes_cblas.h:17
@ CblasColMajor
Definition blis_prototypes_cblas.h:17
CBLAS_UPLO
Definition blis_prototypes_cblas.h:19
CBLAS_SIDE
Definition blis_prototypes_cblas.h:21
void cblas_csymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const void *beta, void *C, const int ldc)
void bl1_param_map_to_netlib_side(side1_t blis_side, void *blas_side)
Definition bl1_param_map.c:71
void bl1_param_map_to_netlib_uplo(uplo1_t blis_uplo, void *blas_uplo)
Definition bl1_param_map.c:47

References bl1_param_map_to_netlib_side(), bl1_param_map_to_netlib_uplo(), cblas_csymm(), CblasColMajor, and F77_csymm().

Referenced by bl1_csymm().

◆ bl1_dsymm()

void bl1_dsymm ( side1_t  side,
uplo1_t  uplo,
int  m,
int  n,
double alpha,
double a,
int  a_rs,
int  a_cs,
double b,
int  b_rs,
int  b_cs,
double beta,
double c,
int  c_rs,
int  c_cs 
)
275{
276 int m_save = m;
277 int n_save = n;
278 double* a_save = a;
279 double* b_save = b;
280 double* c_save = c;
281 int a_rs_save = a_rs;
282 int a_cs_save = a_cs;
283 int b_rs_save = b_rs;
284 int b_cs_save = b_cs;
285 int c_rs_save = c_rs;
286 int c_cs_save = c_cs;
287 double zero = bl1_d0();
288 double one = bl1_d1();
289 double* b_copy;
290 double* c_trans;
291 int dim_a;
292 int lda, inca;
293 int ldb, incb;
294 int ldc, incc;
295 int ldb_copy, incb_copy;
300
301 // Return early if possible.
302 if ( bl1_zero_dim2( m, n ) ) return;
303
304 // If necessary, allocate, initialize, and use a temporary contiguous
305 // copy of each matrix rather than the original matrices.
308 dim_a,
309 dim_a,
311 &a, &a_rs, &a_cs );
312
314 n,
316 &b, &b_rs, &b_cs );
317
319 n,
321 &c, &c_rs, &c_cs );
322
323 // Initialize with values assuming column-major storage.
324 lda = a_cs;
325 inca = a_rs;
326 ldb = b_cs;
327 incb = b_rs;
328 ldc = c_cs;
329 incc = c_rs;
330
331 // Adjust the parameters based on the storage of each matrix.
332 if ( bl1_is_col_storage( c_rs, c_cs ) )
333 {
334 if ( bl1_is_col_storage( a_rs, a_cs ) )
335 {
336 if ( bl1_is_col_storage( b_rs, b_cs ) )
337 {
338 // requested operation: C_c += uplo( A_c ) * B_c
339 // effective operation: C_c += uplo( A_c ) * B_c
340 }
341 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
342 {
343 // requested operation: C_c += uplo( A_c ) * B_r
344 // effective operation: C_c += uplo( A_c ) * B_c
346 }
347 }
348 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
349 {
350 if ( bl1_is_col_storage( b_rs, b_cs ) )
351 {
352 // requested operation: C_c += uplo( A_r ) * B_c
353 // effective operation: C_c += ~uplo( conj( A_c ) ) * B_c
355
356 bl1_toggle_uplo( uplo );
357 }
358 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
359 {
360 // requested operation: C_c += uplo( A_r ) * B_r
361 // effective operation: C_c += ( B_c * ~uplo( conj( A_c ) ) )^T
364
366 bl1_toggle_uplo( uplo );
367
369 }
370 }
371 }
372 else // if ( bl1_is_row_storage( c_rs, c_cs ) )
373 {
374 if ( bl1_is_col_storage( a_rs, a_cs ) )
375 {
376 if ( bl1_is_col_storage( b_rs, b_cs ) )
377 {
378 // requested operation: C_r += uplo( A_c ) * B_c
379 // effective operation: C_c += ( uplo( A_c ) * B_c )^T
381
382 bl1_swap_ints( m, n );
383
385 }
386 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
387 {
388 // requested operation: C_r += uplo( A_c ) * B_r
389 // effective operation: C_c += B_c * ~uplo( conj( A_c ) )
392
393 bl1_swap_ints( m, n );
394
396 }
397 }
398 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
399 {
400 if ( bl1_is_col_storage( b_rs, b_cs ) )
401 {
402 // requested operation: C_r += uplo( A_r ) * B_c
403 // effective operation: C_c += B_c^T * ~uplo( A_c )
406
407 bl1_swap_ints( m, n );
408
410 bl1_toggle_uplo( uplo );
411
414 }
415 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
416 {
417 // requested operation: C_r += uplo( A_r ) * B_r
418 // effective operation: C_c += B_c * conj( ~uplo( A_c ) )
422
423 bl1_swap_ints( m, n );
424
425 bl1_toggle_uplo( uplo );
427 }
428 }
429 }
430
431 // We need a temporary matrix for the cases where B needs to be copied.
432 b_copy = b;
433 ldb_copy = ldb;
434 incb_copy = incb;
435
436 // There are two cases where we need to make a copy of B: one where the
437 // copy's dimensions are transposed from the original B, and one where
438 // the dimensions are not swapped.
439 if ( symm_needs_copyb )
440 {
442
443 // Set transb, which determines whether or not we need to copy from B
444 // as if it needs a transposition. If a transposition is needed, then
445 // m and n and have already been swapped. So in either case m
446 // represents the leading dimension of the copy.
449
450 b_copy = bl1_dallocm( m, n );
451 ldb_copy = m;
452 incb_copy = 1;
453
455 m,
456 n,
457 b, incb, ldb,
459 }
460
461 // There are two cases where we need to perform the symm and then axpy
462 // the result into C with a transposition. We handle those cases here.
463 if ( symm_needs_axpyt )
464 {
465 // We need a temporary matrix for holding C^T. Notice that m and n
466 // represent the dimensions of C, and thus C_trans is n-by-m
467 // (interpreting both as column-major matrices). So the leading
468 // dimension of the temporary matrix holding C^T is n.
469 c_trans = bl1_dallocm( n, m );
470 ldc_trans = n;
471 incc_trans = 1;
472
473 // Compute A * B (or B * A) and store the result in C_trans.
474 // Note that there is no overlap between the axpyt cases and
475 // the conja/copyb cases, hence the use of a, b, lda, and ldb.
477 uplo,
478 n,
479 m,
480 alpha,
481 a, lda,
482 b, ldb,
483 &zero,
485
486 // Scale C by beta.
488 m,
489 n,
490 beta,
491 c, incc, ldc );
492
493 // And finally, accumulate the matrix product in C_trans into C
494 // with a transpose.
496 m,
497 n,
498 &one,
500 c, incc, ldc );
501
502 // Free the temporary matrix for C.
504 }
505 else // no extra axpyt step needed
506 {
508 uplo,
509 m,
510 n,
511 alpha,
512 a, lda,
514 beta,
515 c, ldc );
516 }
517
518 if ( symm_needs_copyb )
519 bl1_dfree( b_copy );
520
521 // Free any temporary contiguous matrices, copying the result back to
522 // the original matrix.
524 &a, &a_rs, &a_cs );
525
527 &b, &b_rs, &b_cs );
528
530 n_save,
532 &c, &c_rs, &c_cs );
533}
void bl1_daxpymt(trans1_t trans, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
Definition bl1_axpymt.c:81
void bl1_dcopymt(trans1_t trans, int m, int n, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
Definition bl1_copymt.c:148
void bl1_dscalm(conj1_t conj, int m, int n, double *alpha, double *a, int a_rs, int a_cs)
Definition bl1_scalm.c:65
void bl1_dsymm_blas(side1_t side, uplo1_t uplo, int m, int n, double *alpha, double *a, int lda, double *b, int ldb, double *beta, double *c, int ldc)
Definition bl1_symm.c:1098
void bl1_dfree_contigm(double *a_save, int a_rs_save, int a_cs_save, double **a, int *a_rs, int *a_cs)
Definition bl1_free_contigm.c:29
void bl1_dcreate_contigmr(uplo1_t uplo, int m, int n, double *a_save, int a_rs_save, int a_cs_save, double **a, int *a_rs, int *a_cs)
Definition bl1_create_contigmr.c:45
void bl1_dcreate_contigm(int m, int n, double *a_save, int a_rs_save, int a_cs_save, double **a, int *a_rs, int *a_cs)
Definition bl1_create_contigm.c:47
void bl1_dfree_saved_contigm(int m, int n, double *a_save, int a_rs_save, int a_cs_save, double **a, int *a_rs, int *a_cs)
Definition bl1_free_saved_contigm.c:36
double bl1_d0(void)
Definition bl1_constants.c:118
void bl1_dfree(double *p)
Definition bl1_free.c:35
double * bl1_dallocm(unsigned int m, unsigned int n)
Definition bl1_allocm.c:35
double bl1_d1(void)
Definition bl1_constants.c:54

References bl1_d0(), bl1_d1(), bl1_dallocm(), bl1_daxpymt(), bl1_dcopymt(), bl1_dcreate_contigm(), bl1_dcreate_contigmr(), bl1_dfree(), bl1_dfree_contigm(), bl1_dfree_saved_contigm(), bl1_dscalm(), bl1_dsymm_blas(), bl1_is_col_storage(), bl1_set_dim_with_side(), bl1_zero_dim2(), BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, and BLIS1_TRANSPOSE.

Referenced by bl1_dhemm(), FLA_Hemm_external(), and FLA_Symm_external().

◆ bl1_dsymm_blas()

void bl1_dsymm_blas ( side1_t  side,
uplo1_t  uplo,
int  m,
int  n,
double alpha,
double a,
int  lda,
double b,
int  ldb,
double beta,
double c,
int  ldc 
)
1099{
1100#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
1104
1107
1109 cblas_side,
1110 cblas_uplo,
1111 m,
1112 n,
1113 *alpha,
1114 a, lda,
1115 b, ldb,
1116 *beta,
1117 c, ldc );
1118#else
1119 char blas_side;
1120 char blas_uplo;
1121
1124
1126 &blas_uplo,
1127 &m,
1128 &n,
1129 alpha,
1130 a, &lda,
1131 b, &ldb,
1132 beta,
1133 c, &ldc );
1134#endif
1135}
void F77_dsymm(char *side, char *uplo, int *m, int *n, double *alpha, double *a, int *lda, double *b, int *ldb, double *beta, double *c, int *ldc)
void cblas_dsymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const double alpha, const double *A, const int lda, const double *B, const int ldb, const double beta, double *C, const int ldc)

References bl1_param_map_to_netlib_side(), bl1_param_map_to_netlib_uplo(), cblas_dsymm(), CblasColMajor, and F77_dsymm().

Referenced by bl1_dsymm().

◆ bl1_ssymm()

void bl1_ssymm ( side1_t  side,
uplo1_t  uplo,
int  m,
int  n,
float alpha,
float a,
int  a_rs,
int  a_cs,
float b,
int  b_rs,
int  b_cs,
float beta,
float c,
int  c_rs,
int  c_cs 
)
14{
15 int m_save = m;
16 int n_save = n;
17 float* a_save = a;
18 float* b_save = b;
19 float* c_save = c;
20 int a_rs_save = a_rs;
21 int a_cs_save = a_cs;
22 int b_rs_save = b_rs;
23 int b_cs_save = b_cs;
24 int c_rs_save = c_rs;
25 int c_cs_save = c_cs;
26 float zero = bl1_s0();
27 float one = bl1_s1();
28 float* b_copy;
29 float* c_trans;
30 int dim_a;
31 int lda, inca;
32 int ldb, incb;
33 int ldc, incc;
39
40 // Return early if possible.
41 if ( bl1_zero_dim2( m, n ) ) return;
42
43 // If necessary, allocate, initialize, and use a temporary contiguous
44 // copy of each matrix rather than the original matrices.
47 dim_a,
48 dim_a,
50 &a, &a_rs, &a_cs );
51
53 n,
55 &b, &b_rs, &b_cs );
56
58 n,
60 &c, &c_rs, &c_cs );
61
62 // Initialize with values assuming column-major storage.
63 lda = a_cs;
64 inca = a_rs;
65 ldb = b_cs;
66 incb = b_rs;
67 ldc = c_cs;
68 incc = c_rs;
69
70 // Adjust the parameters based on the storage of each matrix.
72 {
74 {
76 {
77 // requested operation: C_c += uplo( A_c ) * B_c
78 // effective operation: C_c += uplo( A_c ) * B_c
79 }
80 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
81 {
82 // requested operation: C_c += uplo( A_c ) * B_r
83 // effective operation: C_c += uplo( A_c ) * B_c
85 }
86 }
87 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
88 {
90 {
91 // requested operation: C_c += uplo( A_r ) * B_c
92 // effective operation: C_c += ~uplo( conj( A_c ) ) * B_c
94
95 bl1_toggle_uplo( uplo );
96 }
97 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
98 {
99 // requested operation: C_c += uplo( A_r ) * B_r
100 // effective operation: C_c += ( B_c * ~uplo( conj( A_c ) ) )^T
103
105 bl1_toggle_uplo( uplo );
106
108 }
109 }
110 }
111 else // if ( bl1_is_row_storage( c_rs, c_cs ) )
112 {
113 if ( bl1_is_col_storage( a_rs, a_cs ) )
114 {
115 if ( bl1_is_col_storage( b_rs, b_cs ) )
116 {
117 // requested operation: C_r += uplo( A_c ) * B_c
118 // effective operation: C_c += ( uplo( A_c ) * B_c )^T
120
121 bl1_swap_ints( m, n );
122
124 }
125 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
126 {
127 // requested operation: C_r += uplo( A_c ) * B_r
128 // effective operation: C_c += B_c * ~uplo( conj( A_c ) )
131
132 bl1_swap_ints( m, n );
133
135 }
136 }
137 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
138 {
139 if ( bl1_is_col_storage( b_rs, b_cs ) )
140 {
141 // requested operation: C_r += uplo( A_r ) * B_c
142 // effective operation: C_c += B_c^T * ~uplo( A_c )
145
146 bl1_swap_ints( m, n );
147
149 bl1_toggle_uplo( uplo );
150
153 }
154 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
155 {
156 // requested operation: C_r += uplo( A_r ) * B_r
157 // effective operation: C_c += B_c * conj( ~uplo( A_c ) )
161
162 bl1_swap_ints( m, n );
163
164 bl1_toggle_uplo( uplo );
166 }
167 }
168 }
169
170 // We need a temporary matrix for the cases where B needs to be copied.
171 b_copy = b;
172 ldb_copy = ldb;
173 incb_copy = incb;
174
175 // There are two cases where we need to make a copy of B: one where the
176 // copy's dimensions are transposed from the original B, and one where
177 // the dimensions are not swapped.
178 if ( symm_needs_copyb )
179 {
181
182 // Set transb, which determines whether or not we need to copy from B
183 // as if it needs a transposition. If a transposition is needed, then
184 // m and n and have already been swapped. So in either case m
185 // represents the leading dimension of the copy.
188
189 b_copy = bl1_sallocm( m, n );
190 ldb_copy = m;
191 incb_copy = 1;
192
194 m,
195 n,
196 b, incb, ldb,
198 }
199
200 // There are two cases where we need to perform the symm and then axpy
201 // the result into C with a transposition. We handle those cases here.
202 if ( symm_needs_axpyt )
203 {
204 // We need a temporary matrix for holding C^T. Notice that m and n
205 // represent the dimensions of C, and thus C_trans is n-by-m
206 // (interpreting both as column-major matrices). So the leading
207 // dimension of the temporary matrix holding C^T is n.
208 c_trans = bl1_sallocm( n, m );
209 ldc_trans = n;
210 incc_trans = 1;
211
212 // Compute A * B (or B * A) and store the result in C_trans.
213 // Note that there is no overlap between the axpyt cases and
214 // the conja/copyb cases, hence the use of a, b, lda, and ldb.
216 uplo,
217 n,
218 m,
219 alpha,
220 a, lda,
221 b, ldb,
222 &zero,
224
225 // Scale C by beta.
227 m,
228 n,
229 beta,
230 c, incc, ldc );
231
232 // And finally, accumulate the matrix product in C_trans into C
233 // with a transpose.
235 m,
236 n,
237 &one,
239 c, incc, ldc );
240
241 // Free the temporary matrix for C.
243 }
244 else // no extra axpyt step needed
245 {
247 uplo,
248 m,
249 n,
250 alpha,
251 a, lda,
253 beta,
254 c, ldc );
255 }
256
257 if ( symm_needs_copyb )
258 bl1_sfree( b_copy );
259
260 // Free any temporary contiguous matrices, copying the result back to
261 // the original matrix.
263 &a, &a_rs, &a_cs );
264
266 &b, &b_rs, &b_cs );
267
269 n_save,
271 &c, &c_rs, &c_cs );
272}
void bl1_saxpymt(trans1_t trans, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
Definition bl1_axpymt.c:13
void bl1_scopymt(trans1_t trans, int m, int n, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
Definition bl1_copymt.c:81
void bl1_sscalm(conj1_t conj, int m, int n, float *alpha, float *a, int a_rs, int a_cs)
Definition bl1_scalm.c:13
void bl1_ssymm_blas(side1_t side, uplo1_t uplo, int m, int n, float *alpha, float *a, int lda, float *b, int ldb, float *beta, float *c, int ldc)
Definition bl1_symm.c:1059
float bl1_s0(void)
Definition bl1_constants.c:111
void bl1_sfree_contigm(float *a_save, int a_rs_save, int a_cs_save, float **a, int *a_rs, int *a_cs)
Definition bl1_free_contigm.c:13
void bl1_sfree(float *p)
Definition bl1_free.c:30
float * bl1_sallocm(unsigned int m, unsigned int n)
Definition bl1_allocm.c:30
void bl1_sfree_saved_contigm(int m, int n, float *a_save, int a_rs_save, int a_cs_save, float **a, int *a_rs, int *a_cs)
Definition bl1_free_saved_contigm.c:13
void bl1_screate_contigmr(uplo1_t uplo, int m, int n, float *a_save, int a_rs_save, int a_cs_save, float **a, int *a_rs, int *a_cs)
Definition bl1_create_contigmr.c:13
float bl1_s1(void)
Definition bl1_constants.c:47
void bl1_screate_contigm(int m, int n, float *a_save, int a_rs_save, int a_cs_save, float **a, int *a_rs, int *a_cs)
Definition bl1_create_contigm.c:13

References bl1_is_col_storage(), bl1_s0(), bl1_s1(), bl1_sallocm(), bl1_saxpymt(), bl1_scopymt(), bl1_screate_contigm(), bl1_screate_contigmr(), bl1_set_dim_with_side(), bl1_sfree(), bl1_sfree_contigm(), bl1_sfree_saved_contigm(), bl1_sscalm(), bl1_ssymm_blas(), bl1_zero_dim2(), BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, and BLIS1_TRANSPOSE.

Referenced by bl1_shemm(), FLA_Hemm_external(), and FLA_Symm_external().

◆ bl1_ssymm_blas()

void bl1_ssymm_blas ( side1_t  side,
uplo1_t  uplo,
int  m,
int  n,
float alpha,
float a,
int  lda,
float b,
int  ldb,
float beta,
float c,
int  ldc 
)
1060{
1061#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
1065
1068
1070 cblas_side,
1071 cblas_uplo,
1072 m,
1073 n,
1074 *alpha,
1075 a, lda,
1076 b, ldb,
1077 *beta,
1078 c, ldc );
1079#else
1080 char blas_side;
1081 char blas_uplo;
1082
1085
1087 &blas_uplo,
1088 &m,
1089 &n,
1090 alpha,
1091 a, &lda,
1092 b, &ldb,
1093 beta,
1094 c, &ldc );
1095#endif
1096}
void F77_ssymm(char *side, char *uplo, int *m, int *n, float *alpha, float *a, int *lda, float *b, int *ldb, float *beta, float *c, int *ldc)
void cblas_ssymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const float alpha, const float *A, const int lda, const float *B, const int ldb, const float beta, float *C, const int ldc)

References bl1_param_map_to_netlib_side(), bl1_param_map_to_netlib_uplo(), cblas_ssymm(), CblasColMajor, and F77_ssymm().

Referenced by bl1_ssymm().

◆ bl1_zsymm()

void bl1_zsymm ( side1_t  side,
uplo1_t  uplo,
int  m,
int  n,
dcomplex alpha,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs,
dcomplex beta,
dcomplex c,
int  c_rs,
int  c_cs 
)
797{
798 int m_save = m;
799 int n_save = n;
800 dcomplex* a_save = a;
801 dcomplex* b_save = b;
802 dcomplex* c_save = c;
803 int a_rs_save = a_rs;
804 int a_cs_save = a_cs;
805 int b_rs_save = b_rs;
806 int b_cs_save = b_cs;
807 int c_rs_save = c_rs;
808 int c_cs_save = c_cs;
809 dcomplex zero = bl1_z0();
810 dcomplex one = bl1_z1();
813 int dim_a;
814 int lda, inca;
815 int ldb, incb;
816 int ldc, incc;
817 int ldb_copy, incb_copy;
822
823 // Return early if possible.
824 if ( bl1_zero_dim2( m, n ) ) return;
825
826 // If necessary, allocate, initialize, and use a temporary contiguous
827 // copy of each matrix rather than the original matrices.
830 dim_a,
831 dim_a,
833 &a, &a_rs, &a_cs );
834
836 n,
838 &b, &b_rs, &b_cs );
839
841 n,
843 &c, &c_rs, &c_cs );
844
845 // Initialize with values assuming column-major storage.
846 lda = a_cs;
847 inca = a_rs;
848 ldb = b_cs;
849 incb = b_rs;
850 ldc = c_cs;
851 incc = c_rs;
852
853 // Adjust the parameters based on the storage of each matrix.
854 if ( bl1_is_col_storage( c_rs, c_cs ) )
855 {
856 if ( bl1_is_col_storage( a_rs, a_cs ) )
857 {
858 if ( bl1_is_col_storage( b_rs, b_cs ) )
859 {
860 // requested operation: C_c += uplo( A_c ) * B_c
861 // effective operation: C_c += uplo( A_c ) * B_c
862 }
863 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
864 {
865 // requested operation: C_c += uplo( A_c ) * B_r
866 // effective operation: C_c += uplo( A_c ) * B_c
868 }
869 }
870 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
871 {
872 if ( bl1_is_col_storage( b_rs, b_cs ) )
873 {
874 // requested operation: C_c += uplo( A_r ) * B_c
875 // effective operation: C_c += ~uplo( conj( A_c ) ) * B_c
877
878 bl1_toggle_uplo( uplo );
879 }
880 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
881 {
882 // requested operation: C_c += uplo( A_r ) * B_r
883 // effective operation: C_c += ( B_c * ~uplo( conj( A_c ) ) )^T
886
888 bl1_toggle_uplo( uplo );
889
891 }
892 }
893 }
894 else // if ( bl1_is_row_storage( c_rs, c_cs ) )
895 {
896 if ( bl1_is_col_storage( a_rs, a_cs ) )
897 {
898 if ( bl1_is_col_storage( b_rs, b_cs ) )
899 {
900 // requested operation: C_r += uplo( A_c ) * B_c
901 // effective operation: C_c += ( uplo( A_c ) * B_c )^T
903
904 bl1_swap_ints( m, n );
905
907 }
908 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
909 {
910 // requested operation: C_r += uplo( A_c ) * B_r
911 // effective operation: C_c += B_c * ~uplo( conj( A_c ) )
914
915 bl1_swap_ints( m, n );
916
918 }
919 }
920 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
921 {
922 if ( bl1_is_col_storage( b_rs, b_cs ) )
923 {
924 // requested operation: C_r += uplo( A_r ) * B_c
925 // effective operation: C_c += B_c^T * ~uplo( A_c )
928
929 bl1_swap_ints( m, n );
930
932 bl1_toggle_uplo( uplo );
933
936 }
937 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
938 {
939 // requested operation: C_r += uplo( A_r ) * B_r
940 // effective operation: C_c += B_c * conj( ~uplo( A_c ) )
944
945 bl1_swap_ints( m, n );
946
947 bl1_toggle_uplo( uplo );
949 }
950 }
951 }
952
953 // We need a temporary matrix for the cases where B needs to be copied.
954 b_copy = b;
955 ldb_copy = ldb;
956 incb_copy = incb;
957
958 // There are two cases where we need to make a copy of B: one where the
959 // copy's dimensions are transposed from the original B, and one where
960 // the dimensions are not swapped.
961 if ( symm_needs_copyb )
962 {
964
965 // Set transb, which determines whether or not we need to copy from B
966 // as if it needs a transposition. If a transposition is needed, then
967 // m and n and have already been swapped. So in either case m
968 // represents the leading dimension of the copy.
971
972 b_copy = bl1_zallocm( m, n );
973 ldb_copy = m;
974 incb_copy = 1;
975
977 m,
978 n,
979 b, incb, ldb,
981 }
982
983 // There are two cases where we need to perform the symm and then axpy
984 // the result into C with a transposition. We handle those cases here.
985 if ( symm_needs_axpyt )
986 {
987 // We need a temporary matrix for holding C^T. Notice that m and n
988 // represent the dimensions of C, and thus C_trans is n-by-m
989 // (interpreting both as column-major matrices). So the leading
990 // dimension of the temporary matrix holding C^T is n.
991 c_trans = bl1_zallocm( n, m );
992 ldc_trans = n;
993 incc_trans = 1;
994
995 // Compute A * B (or B * A) and store the result in C_trans.
996 // Note that there is no overlap between the axpyt cases and
997 // the conja/copyb cases, hence the use of a, b, lda, and ldb.
999 uplo,
1000 n,
1001 m,
1002 alpha,
1003 a, lda,
1004 b, ldb,
1005 &zero,
1006 c_trans, ldc_trans );
1007
1008 // Scale C by beta.
1010 m,
1011 n,
1012 beta,
1013 c, incc, ldc );
1014
1015 // And finally, accumulate the matrix product in C_trans into C
1016 // with a transpose.
1018 m,
1019 n,
1020 &one,
1022 c, incc, ldc );
1023
1024 // Free the temporary matrix for C.
1025 bl1_zfree( c_trans );
1026 }
1027 else // no extra axpyt step needed
1028 {
1030 uplo,
1031 m,
1032 n,
1033 alpha,
1034 a, lda,
1036 beta,
1037 c, ldc );
1038 }
1039
1040 if ( symm_needs_copyb )
1041 bl1_zfree( b_copy );
1042
1043 // Free any temporary contiguous matrices, copying the result back to
1044 // the original matrix.
1046 &a, &a_rs, &a_cs );
1047
1049 &b, &b_rs, &b_cs );
1050
1052 n_save,
1054 &c, &c_rs, &c_cs );
1055}
void bl1_zaxpymt(trans1_t trans, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
Definition bl1_axpymt.c:248
void bl1_zcopymt(trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
Definition bl1_copymt.c:286
void bl1_zscalm(conj1_t conj, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs)
Definition bl1_scalm.c:273
void bl1_zsymm_blas(side1_t side, uplo1_t uplo, int m, int n, dcomplex *alpha, dcomplex *a, int lda, dcomplex *b, int ldb, dcomplex *beta, dcomplex *c, int ldc)
Definition bl1_symm.c:1176
dcomplex bl1_z0(void)
Definition bl1_constants.c:133
dcomplex bl1_z1(void)
Definition bl1_constants.c:69
void bl1_zcreate_contigm(int m, int n, dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition bl1_create_contigm.c:115
void bl1_zcreate_contigmr(uplo1_t uplo, int m, int n, dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition bl1_create_contigmr.c:109
dcomplex * bl1_zallocm(unsigned int m, unsigned int n)
Definition bl1_allocm.c:45
void bl1_zfree(dcomplex *p)
Definition bl1_free.c:45
void bl1_zfree_contigm(dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition bl1_free_contigm.c:61
void bl1_zfree_saved_contigm(int m, int n, dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition bl1_free_saved_contigm.c:82
Definition blis_type_defs.h:138

References bl1_is_col_storage(), bl1_set_dim_with_side(), bl1_z0(), bl1_z1(), bl1_zallocm(), bl1_zaxpymt(), bl1_zcopymt(), bl1_zcreate_contigm(), bl1_zcreate_contigmr(), bl1_zero_dim2(), bl1_zfree(), bl1_zfree_contigm(), bl1_zfree_saved_contigm(), bl1_zscalm(), bl1_zsymm_blas(), BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, and BLIS1_TRANSPOSE.

Referenced by FLA_Symm_external().

◆ bl1_zsymm_blas()

void bl1_zsymm_blas ( side1_t  side,
uplo1_t  uplo,
int  m,
int  n,
dcomplex alpha,
dcomplex a,
int  lda,
dcomplex b,
int  ldb,
dcomplex beta,
dcomplex c,
int  ldc 
)
1177{
1178#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
1182
1185
1187 cblas_side,
1188 cblas_uplo,
1189 m,
1190 n,
1191 alpha,
1192 a, lda,
1193 b, ldb,
1194 beta,
1195 c, ldc );
1196#else
1197 char blas_side;
1198 char blas_uplo;
1199
1202
1204 &blas_uplo,
1205 &m,
1206 &n,
1207 alpha,
1208 a, &lda,
1209 b, &ldb,
1210 beta,
1211 c, &ldc );
1212#endif
1213}
void F77_zsymm(char *side, char *uplo, int *m, int *n, dcomplex *alpha, dcomplex *a, int *lda, dcomplex *b, int *ldb, dcomplex *beta, dcomplex *c, int *ldc)
void cblas_zsymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const void *beta, void *C, const int ldc)

References bl1_param_map_to_netlib_side(), bl1_param_map_to_netlib_uplo(), cblas_zsymm(), CblasColMajor, and F77_zsymm().

Referenced by bl1_zsymm().