libflame revision_anchor
Functions
blis_prototypes_level3.h File Reference

(r)

Go to the source code of this file.

Functions

void bl1_sgemm (trans1_t transa, trans1_t transb, int m, int k, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs, float *beta, float *c, int c_rs, int c_cs)
 
void bl1_dgemm (trans1_t transa, trans1_t transb, int m, int k, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs, double *beta, double *c, int c_rs, int c_cs)
 
void bl1_cgemm (trans1_t transa, trans1_t transb, int m, int k, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs, scomplex *beta, scomplex *c, int c_rs, int c_cs)
 
void bl1_zgemm (trans1_t transa, trans1_t transb, int m, int k, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs, dcomplex *beta, dcomplex *c, int c_rs, int c_cs)
 
void bl1_sgemm_blas (trans1_t transa, trans1_t transb, int m, int n, int k, float *alpha, float *a, int lda, float *b, int ldb, float *beta, float *c, int ldc)
 
void bl1_dgemm_blas (trans1_t transa, trans1_t transb, int m, int n, int k, double *alpha, double *a, int lda, double *b, int ldb, double *beta, double *c, int ldc)
 
void bl1_cgemm_blas (trans1_t transa, trans1_t transb, int m, int n, int k, scomplex *alpha, scomplex *a, int lda, scomplex *b, int ldb, scomplex *beta, scomplex *c, int ldc)
 
void bl1_zgemm_blas (trans1_t transa, trans1_t transb, int m, int n, int k, dcomplex *alpha, dcomplex *a, int lda, dcomplex *b, int ldb, dcomplex *beta, dcomplex *c, int ldc)
 
void bl1_shemm (side1_t side, uplo1_t uplo, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs, float *beta, float *c, int c_rs, int c_cs)
 
void bl1_dhemm (side1_t side, uplo1_t uplo, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs, double *beta, double *c, int c_rs, int c_cs)
 
void bl1_chemm (side1_t side, uplo1_t uplo, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs, scomplex *beta, scomplex *c, int c_rs, int c_cs)
 
void bl1_zhemm (side1_t side, uplo1_t uplo, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs, dcomplex *beta, dcomplex *c, int c_rs, int c_cs)
 
void bl1_chemm_blas (side1_t side, uplo1_t uplo, int m, int n, scomplex *alpha, scomplex *a, int lda, scomplex *b, int ldb, scomplex *beta, scomplex *c, int ldc)
 
void bl1_zhemm_blas (side1_t side, uplo1_t uplo, int m, int n, dcomplex *alpha, dcomplex *a, int lda, dcomplex *b, int ldb, dcomplex *beta, dcomplex *c, int ldc)
 
void bl1_sherk (uplo1_t uplo, trans1_t trans, int m, int k, float *alpha, float *a, int a_rs, int a_cs, float *beta, float *c, int c_rs, int c_cs)
 
void bl1_dherk (uplo1_t uplo, trans1_t trans, int m, int k, double *alpha, double *a, int a_rs, int a_cs, double *beta, double *c, int c_rs, int c_cs)
 
void bl1_cherk (uplo1_t uplo, trans1_t trans, int m, int k, float *alpha, scomplex *a, int a_rs, int a_cs, float *beta, scomplex *c, int c_rs, int c_cs)
 
void bl1_zherk (uplo1_t uplo, trans1_t trans, int m, int k, double *alpha, dcomplex *a, int a_rs, int a_cs, double *beta, dcomplex *c, int c_rs, int c_cs)
 
void bl1_cherk_blas (uplo1_t uplo, trans1_t trans, int m, int k, float *alpha, scomplex *a, int lda, float *beta, scomplex *c, int ldc)
 
void bl1_zherk_blas (uplo1_t uplo, trans1_t trans, int m, int k, double *alpha, dcomplex *a, int lda, double *beta, dcomplex *c, int ldc)
 
void bl1_sher2k (uplo1_t uplo, trans1_t trans, int m, int k, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs, float *beta, float *c, int c_rs, int c_cs)
 
void bl1_dher2k (uplo1_t uplo, trans1_t trans, int m, int k, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs, double *beta, double *c, int c_rs, int c_cs)
 
void bl1_cher2k (uplo1_t uplo, trans1_t trans, int m, int k, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs, float *beta, scomplex *c, int c_rs, int c_cs)
 
void bl1_zher2k (uplo1_t uplo, trans1_t trans, int m, int k, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs, double *beta, dcomplex *c, int c_rs, int c_cs)
 
void bl1_cher2k_blas (uplo1_t uplo, trans1_t trans, int m, int k, scomplex *alpha, scomplex *a, int lda, scomplex *b, int ldb, float *beta, scomplex *c, int ldc)
 
void bl1_zher2k_blas (uplo1_t uplo, trans1_t trans, int m, int k, dcomplex *alpha, dcomplex *a, int lda, dcomplex *b, int ldb, double *beta, dcomplex *c, int ldc)
 
void bl1_ssymm (side1_t side, uplo1_t uplo, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs, float *beta, float *c, int c_rs, int c_cs)
 
void bl1_dsymm (side1_t side, uplo1_t uplo, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs, double *beta, double *c, int c_rs, int c_cs)
 
void bl1_csymm (side1_t side, uplo1_t uplo, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs, scomplex *beta, scomplex *c, int c_rs, int c_cs)
 
void bl1_zsymm (side1_t side, uplo1_t uplo, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs, dcomplex *beta, dcomplex *c, int c_rs, int c_cs)
 
void bl1_ssymm_blas (side1_t side, uplo1_t uplo, int m, int n, float *alpha, float *a, int lda, float *b, int ldb, float *beta, float *c, int ldc)
 
void bl1_dsymm_blas (side1_t side, uplo1_t uplo, int m, int n, double *alpha, double *a, int lda, double *b, int ldb, double *beta, double *c, int ldc)
 
void bl1_csymm_blas (side1_t side, uplo1_t uplo, int m, int n, scomplex *alpha, scomplex *a, int lda, scomplex *b, int ldb, scomplex *beta, scomplex *c, int ldc)
 
void bl1_zsymm_blas (side1_t side, uplo1_t uplo, int m, int n, dcomplex *alpha, dcomplex *a, int lda, dcomplex *b, int ldb, dcomplex *beta, dcomplex *c, int ldc)
 
void bl1_ssyrk (uplo1_t uplo, trans1_t trans, int m, int k, float *alpha, float *a, int a_rs, int a_cs, float *beta, float *c, int c_rs, int c_cs)
 
void bl1_dsyrk (uplo1_t uplo, trans1_t trans, int m, int k, double *alpha, double *a, int a_rs, int a_cs, double *beta, double *c, int c_rs, int c_cs)
 
void bl1_csyrk (uplo1_t uplo, trans1_t trans, int m, int k, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *beta, scomplex *c, int c_rs, int c_cs)
 
void bl1_zsyrk (uplo1_t uplo, trans1_t trans, int m, int k, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *beta, dcomplex *c, int c_rs, int c_cs)
 
void bl1_ssyrk_blas (uplo1_t uplo, trans1_t trans, int m, int k, float *alpha, float *a, int lda, float *beta, float *c, int ldc)
 
void bl1_dsyrk_blas (uplo1_t uplo, trans1_t trans, int m, int k, double *alpha, double *a, int lda, double *beta, double *c, int ldc)
 
void bl1_csyrk_blas (uplo1_t uplo, trans1_t trans, int m, int k, scomplex *alpha, scomplex *a, int lda, scomplex *beta, scomplex *c, int ldc)
 
void bl1_zsyrk_blas (uplo1_t uplo, trans1_t trans, int m, int k, dcomplex *alpha, dcomplex *a, int lda, dcomplex *beta, dcomplex *c, int ldc)
 
void bl1_ssyr2k (uplo1_t uplo, trans1_t trans, int m, int k, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs, float *beta, float *c, int c_rs, int c_cs)
 
void bl1_dsyr2k (uplo1_t uplo, trans1_t trans, int m, int k, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs, double *beta, double *c, int c_rs, int c_cs)
 
void bl1_csyr2k (uplo1_t uplo, trans1_t trans, int m, int k, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs, scomplex *beta, scomplex *c, int c_rs, int c_cs)
 
void bl1_zsyr2k (uplo1_t uplo, trans1_t trans, int m, int k, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs, dcomplex *beta, dcomplex *c, int c_rs, int c_cs)
 
void bl1_ssyr2k_blas (uplo1_t uplo, trans1_t trans, int m, int k, float *alpha, float *a, int lda, float *b, int ldb, float *beta, float *c, int ldc)
 
void bl1_dsyr2k_blas (uplo1_t uplo, trans1_t trans, int m, int k, double *alpha, double *a, int lda, double *b, int ldb, double *beta, double *c, int ldc)
 
void bl1_csyr2k_blas (uplo1_t uplo, trans1_t trans, int m, int k, scomplex *alpha, scomplex *a, int lda, scomplex *b, int ldb, scomplex *beta, scomplex *c, int ldc)
 
void bl1_zsyr2k_blas (uplo1_t uplo, trans1_t trans, int m, int k, dcomplex *alpha, dcomplex *a, int lda, dcomplex *b, int ldb, dcomplex *beta, dcomplex *c, int ldc)
 
void bl1_strmm (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
 
void bl1_dtrmm (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
 
void bl1_ctrmm (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
 
void bl1_ztrmm (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
 
void bl1_strmm_blas (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, float *alpha, float *a, int lda, float *b, int ldb)
 
void bl1_dtrmm_blas (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, double *alpha, double *a, int lda, double *b, int ldb)
 
void bl1_ctrmm_blas (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, scomplex *alpha, scomplex *a, int lda, scomplex *b, int ldb)
 
void bl1_ztrmm_blas (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, dcomplex *alpha, dcomplex *a, int lda, dcomplex *b, int ldb)
 
void bl1_strsm (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
 
void bl1_dtrsm (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
 
void bl1_ctrsm (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
 
void bl1_ztrsm (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
 
void bl1_strsm_blas (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, float *alpha, float *a, int lda, float *b, int ldb)
 
void bl1_dtrsm_blas (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, double *alpha, double *a, int lda, double *b, int ldb)
 
void bl1_ctrsm_blas (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, scomplex *alpha, scomplex *a, int lda, scomplex *b, int ldb)
 
void bl1_ztrsm_blas (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, dcomplex *alpha, dcomplex *a, int lda, dcomplex *b, int ldb)
 
void bl1_strmmsx (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs, float *beta, float *c, int c_rs, int c_cs)
 
void bl1_dtrmmsx (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs, double *beta, double *c, int c_rs, int c_cs)
 
void bl1_ctrmmsx (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs, scomplex *beta, scomplex *c, int c_rs, int c_cs)
 
void bl1_ztrmmsx (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs, dcomplex *beta, dcomplex *c, int c_rs, int c_cs)
 
void bl1_strsmsx (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs, float *beta, float *c, int c_rs, int c_cs)
 
void bl1_dtrsmsx (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs, double *beta, double *c, int c_rs, int c_cs)
 
void bl1_ctrsmsx (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs, scomplex *beta, scomplex *c, int c_rs, int c_cs)
 
void bl1_ztrsmsx (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs, dcomplex *beta, dcomplex *c, int c_rs, int c_cs)
 

Function Documentation

◆ bl1_cgemm()

void bl1_cgemm ( trans1_t  transa,
trans1_t  transb,
int  m,
int  k,
int  n,
scomplex alpha,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs,
scomplex beta,
scomplex c,
int  c_rs,
int  c_cs 
)
536{
537 int m_save = m;
538 int n_save = n;
539 scomplex* a_save = a;
540 scomplex* b_save = b;
541 scomplex* c_save = c;
542 int a_rs_save = a_rs;
543 int a_cs_save = a_cs;
544 int b_rs_save = b_rs;
545 int b_cs_save = b_cs;
546 int c_rs_save = c_rs;
547 int c_cs_save = c_cs;
548 scomplex zero = bl1_c0();
549 scomplex one = bl1_c1();
555 int lda, inca;
556 int ldb, incb;
557 int ldc, incc;
558 int lda_conj, inca_conj;
559 int ldb_conj, incb_conj;
561 int m_gemm, n_gemm;
563 int a_was_copied;
564 int b_was_copied;
565
566 // Return early if possible.
567 if ( bl1_zero_dim3( m, k, n ) )
568 {
570 m,
571 n,
572 beta,
573 c, c_rs, c_cs );
574 return;
575 }
576
577 // If necessary, allocate, initialize, and use a temporary contiguous
578 // copy of each matrix rather than the original matrices.
580 m,
581 k,
583 &a, &a_rs, &a_cs );
584
586 k,
587 n,
589 &b, &b_rs, &b_cs );
590
592 n,
594 &c, &c_rs, &c_cs );
595
596 // Figure out whether A and/or B was copied to contiguous memory. This
597 // is used later to prevent redundant copying.
598 a_was_copied = ( a != a_save );
599 b_was_copied = ( b != b_save );
600
601 // These are used to track the original values of a and b prior to any
602 // operand swapping that might take place. This is necessary for proper
603 // freeing of memory when one is a temporary contiguous matrix.
604 a_unswap = a;
605 b_unswap = b;
606
607 // These are used to track the dimensions of the product of the
608 // A and B operands to the BLAS invocation of gemm. These differ
609 // from m and n when the operands need to be swapped.
610 m_gemm = m;
611 n_gemm = n;
612
613 // Initialize with values assuming column-major storage.
614 lda = a_cs;
615 inca = a_rs;
616 ldb = b_cs;
617 incb = b_rs;
618 ldc = c_cs;
619 incc = c_rs;
620
621 // Adjust the parameters based on the storage of each matrix.
622 if ( bl1_is_col_storage( c_rs, c_cs ) )
623 {
624 if ( bl1_is_col_storage( a_rs, a_cs ) )
625 {
626 if ( bl1_is_col_storage( b_rs, b_cs ) )
627 {
628 // requested operation: C_c += tr( A_c ) * tr( B_c )
629 // effective operation: C_c += tr( A_c ) * tr( B_c )
630 }
631 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
632 {
633
634 // requested operation: C_c += tr( A_c ) * tr( B_r )
635 // effective operation: C_c += tr( A_c ) * tr( B_c )^T
637
639 }
640 }
641 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
642 {
643 if ( bl1_is_col_storage( b_rs, b_cs ) )
644 {
645 // requested operation: C_c += tr( A_r ) * tr( B_c )
646 // effective operation: C_c += tr( A_r )^T * tr( B_c )
648
650 }
651 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
652 {
653 // requested operation: C_c += tr( A_r ) * tr( B_r )
654 // effective operation: C_c += ( tr( B_c ) * tr( A_c ) )^T
657
663
666 }
667 }
668 }
669 else // if ( bl1_is_row_storage( c_rs, c_cs ) )
670 {
671 if ( bl1_is_col_storage( a_rs, a_cs ) )
672 {
673 if ( bl1_is_col_storage( b_rs, b_cs ) )
674 {
675 // requested operation: C_r += tr( A_c ) * tr( B_c )
676 // effective operation: C_c += ( tr( A_c ) * tr( B_c ) )^T
678
679 bl1_swap_ints( m, n );
680
682 }
683 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
684 {
685 // requested operation: C_r += tr( A_c ) * tr( B_r )
686 // effective operation: C_c += tr( B_c ) * tr( A_c )^T
689
691
692 bl1_swap_ints( m, n );
699 }
700 }
701 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
702 {
703 if ( bl1_is_col_storage( b_rs, b_cs ) )
704 {
705 // requested operation: C_r += tr( A_r ) * tr( B_c )
706 // effective operation: C_c += tr( B_c )^T * tr( A_c )
709
711
712 bl1_swap_ints( m, n );
719 }
720 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
721 {
722 // requested operation: C_r += tr( A_r ) * tr( B_r )
723 // effective operation: C_c += tr( B_c ) * tr( A_c )
727
728 bl1_swap_ints( m, n );
735 }
736 }
737 }
738
739 // We need a temporary matrix for the case where A is conjugated.
740 a_conj = a;
741 lda_conj = lda;
742 inca_conj = inca;
743
744 // If transa indicates conjugate-no-transpose and A was not already
745 // copied, then copy and conjugate it to a temporary matrix. Otherwise,
746 // if transa indicates conjugate-no-transpose and A was already copied,
747 // just conjugate it.
749 {
752 inca_conj = 1;
753
755 m_gemm,
756 k,
757 a, inca, lda,
759 }
760 else if ( bl1_is_conjnotrans( transa ) && a_was_copied )
761 {
763 k,
765 }
766
767 // We need a temporary matrix for the case where B is conjugated.
768 b_conj = b;
769 ldb_conj = ldb;
770 incb_conj = incb;
771
772 // If transb indicates conjugate-no-transpose and B was not already
773 // copied, then copy and conjugate it to a temporary matrix. Otherwise,
774 // if transb indicates conjugate-no-transpose and B was already copied,
775 // just conjugate it.
777 {
779 ldb_conj = k;
780 incb_conj = 1;
781
783 k,
784 n_gemm,
785 b, incb, ldb,
787 }
788 else if ( bl1_is_conjnotrans( transb ) && b_was_copied )
789 {
790 bl1_cconjm( k,
791 n_gemm,
793 }
794
795 // There are two cases where we need to perform the gemm and then axpy
796 // the result into C with a transposition. We handle those cases here.
797 if ( gemm_needs_axpyt )
798 {
799 // We need a temporary matrix for holding C^T. Notice that m and n
800 // represent the dimensions of C, while m_gemm and n_gemm are the
801 // dimensions of the actual product op(A)*op(B), which may be n-by-m
802 // since the operands may have been swapped.
805 incc_trans = 1;
806
807 // Compute tr( A ) * tr( B ), where A and B may have been swapped
808 // to reference the other, and store the result in C_trans.
810 transb,
811 m_gemm,
812 n_gemm,
813 k,
814 alpha,
817 &zero,
819
820 // Scale C by beta.
822 m,
823 n,
824 beta,
825 c, incc, ldc );
826
827 // And finally, accumulate the matrix product in C_trans into C
828 // with a transpose.
830 m,
831 n,
832 &one,
834 c, incc, ldc );
835
836 // Free the temporary matrix for C.
838 }
839 else // no extra axpyt step needed
840 {
842 transb,
843 m_gemm,
844 n_gemm,
845 k,
846 alpha,
849 beta,
850 c, ldc );
851 }
852
854 bl1_cfree( a_conj );
855
857 bl1_cfree( b_conj );
858
859 // Free any temporary contiguous matrices, copying the result back to
860 // the original matrix.
862 &a_unswap, &a_rs, &a_cs );
863
865 &b_unswap, &b_rs, &b_cs );
866
868 n_save,
870 &c, &c_rs, &c_cs );
871}
int i
Definition bl1_axmyv2.c:145
void bl1_caxpymt(trans1_t trans, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
Definition bl1_axpymt.c:149
void bl1_cconjm(int m, int n, scomplex *a, int a_rs, int a_cs)
Definition bl1_conjm.c:23
void bl1_ccopymt(trans1_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
Definition bl1_copymt.c:215
void bl1_cgemm_blas(trans1_t transa, trans1_t transb, int m, int n, int k, scomplex *alpha, scomplex *a, int lda, scomplex *b, int ldb, scomplex *beta, scomplex *c, int ldc)
Definition bl1_gemm.c:1295
void bl1_cscalm(conj1_t conj, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs)
Definition bl1_scalm.c:169
int bl1_is_col_storage(int rs, int cs)
Definition bl1_is.c:90
int bl1_zero_dim3(int m, int k, int n)
Definition bl1_is.c:123
int bl1_is_conjnotrans(trans1_t trans)
Definition bl1_is.c:25
void bl1_cfree_contigm(scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition bl1_free_contigm.c:45
scomplex bl1_c1(void)
Definition bl1_constants.c:61
void bl1_cfree(scomplex *p)
Definition bl1_free.c:40
void bl1_ccreate_contigm(int m, int n, scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition bl1_create_contigm.c:81
scomplex * bl1_callocm(unsigned int m, unsigned int n)
Definition bl1_allocm.c:40
scomplex bl1_c0(void)
Definition bl1_constants.c:125
void bl1_cfree_saved_contigm(int m, int n, scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition bl1_free_saved_contigm.c:59
void bl1_ccreate_contigmt(trans1_t trans_dims, int m, int n, scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition bl1_create_contigmt.c:89
@ BLIS1_TRANSPOSE
Definition blis_type_defs.h:55
@ BLIS1_CONJ_NO_TRANSPOSE
Definition blis_type_defs.h:56
@ BLIS1_NO_CONJUGATE
Definition blis_type_defs.h:81
Definition blis_type_defs.h:133

References bl1_c0(), bl1_c1(), bl1_callocm(), bl1_caxpymt(), bl1_cconjm(), bl1_ccopymt(), bl1_ccreate_contigm(), bl1_ccreate_contigmt(), bl1_cfree(), bl1_cfree_contigm(), bl1_cfree_saved_contigm(), bl1_cgemm_blas(), bl1_cscalm(), bl1_is_col_storage(), bl1_is_conjnotrans(), bl1_zero_dim3(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_NO_CONJUGATE, and BLIS1_TRANSPOSE.

Referenced by FLA_Gemm_external().

◆ bl1_cgemm_blas()

void bl1_cgemm_blas ( trans1_t  transa,
trans1_t  transb,
int  m,
int  n,
int  k,
scomplex alpha,
scomplex a,
int  lda,
scomplex b,
int  ldb,
scomplex beta,
scomplex c,
int  ldc 
)
1296{
1297#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
1301
1304
1308 m,
1309 n,
1310 k,
1311 alpha,
1312 a, lda,
1313 b, ldb,
1314 beta,
1315 c, ldc );
1316#else
1317 char blas_transa;
1318 char blas_transb;
1319
1322
1324 &blas_transb,
1325 &m,
1326 &n,
1327 &k,
1328 alpha,
1329 a, &lda,
1330 b, &ldb,
1331 beta,
1332 c, &ldc );
1333#endif
1334}
void F77_cgemm(char *transa, char *transb, int *m, int *n, int *k, scomplex *alpha, scomplex *a, int *lda, scomplex *b, int *ldb, scomplex *beta, scomplex *c, int *ldc)
CBLAS_ORDER
Definition blis_prototypes_cblas.h:17
@ CblasColMajor
Definition blis_prototypes_cblas.h:17
void cblas_cgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const void *beta, void *C, const int ldc)
CBLAS_TRANSPOSE
Definition blis_prototypes_cblas.h:18
void bl1_param_map_to_netlib_trans(trans1_t blis_trans, void *blas_trans)
Definition bl1_param_map.c:15

References bl1_param_map_to_netlib_trans(), cblas_cgemm(), CblasColMajor, and F77_cgemm().

Referenced by bl1_cgemm().

◆ bl1_chemm()

void bl1_chemm ( side1_t  side,
uplo1_t  uplo,
int  m,
int  n,
scomplex alpha,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs,
scomplex beta,
scomplex c,
int  c_rs,
int  c_cs 
)
39{
40 int m_save = m;
41 int n_save = n;
42 scomplex* a_save = a;
43 scomplex* b_save = b;
44 scomplex* c_save = c;
45 int a_rs_save = a_rs;
46 int a_cs_save = a_cs;
47 int b_rs_save = b_rs;
48 int b_cs_save = b_cs;
49 int c_rs_save = c_rs;
50 int c_cs_save = c_cs;
56 int dim_a;
57 int lda, inca;
58 int ldb, incb;
59 int ldc, incc;
67 int a_was_copied;
68
69 // Return early if possible.
70 if ( bl1_zero_dim2( m, n ) ) return;
71
72 // If necessary, allocate, initialize, and use a temporary contiguous
73 // copy of each matrix rather than the original matrices.
76 dim_a,
77 dim_a,
79 &a, &a_rs, &a_cs );
80
82 n,
84 &b, &b_rs, &b_cs );
85
87 n,
89 &c, &c_rs, &c_cs );
90
91 // Figure out whether A was copied to contiguous memory. This is used to
92 // prevent redundant copying.
93 a_was_copied = ( a != a_save );
94
95 // Initialize with values assuming column-major storage.
96 lda = a_cs;
97 inca = a_rs;
98 ldb = b_cs;
99 incb = b_rs;
100 ldc = c_cs;
101 incc = c_rs;
102
103 // Adjust the parameters based on the storage of each matrix.
104 if ( bl1_is_col_storage( c_rs, c_cs ) )
105 {
106 if ( bl1_is_col_storage( a_rs, a_cs ) )
107 {
108 if ( bl1_is_col_storage( b_rs, b_cs ) )
109 {
110 // requested operation: C_c += uplo( A_c ) * B_c
111 // effective operation: C_c += uplo( A_c ) * B_c
112 }
113 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
114 {
115 // requested operation: C_c += uplo( A_c ) * B_r
116 // effective operation: C_c += uplo( A_c ) * B_c
118 }
119 }
120 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
121 {
122 if ( bl1_is_col_storage( b_rs, b_cs ) )
123 {
124 // requested operation: C_c += uplo( A_r ) * B_c
125 // effective operation: C_c += ~uplo( conj( A_c ) ) * B_c
127
128 bl1_toggle_uplo( uplo );
129
131 }
132 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
133 {
134 // requested operation: C_c += uplo( A_r ) * B_r
135 // effective operation: C_c += ( B_c * ~uplo( conj( A_c ) ) )^T
138
140 bl1_toggle_uplo( uplo );
141
143 }
144 }
145 }
146 else // if ( bl1_is_row_storage( c_rs, c_cs ) )
147 {
148 if ( bl1_is_col_storage( a_rs, a_cs ) )
149 {
150 if ( bl1_is_col_storage( b_rs, b_cs ) )
151 {
152 // requested operation: C_r += uplo( A_c ) * B_c
153 // effective operation: C_c += ( uplo( A_c ) * B_c )^T
155
156 bl1_swap_ints( m, n );
157
159 }
160 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
161 {
162 // requested operation: C_r += uplo( A_c ) * B_r
163 // effective operation: C_c += B_c * ~uplo( conj( A_c ) )
166
167 bl1_swap_ints( m, n );
168
170
172 }
173 }
174 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
175 {
176 if ( bl1_is_col_storage( b_rs, b_cs ) )
177 {
178 // requested operation: C_r += uplo( A_r ) * B_c
179 // effective operation: C_c += B_c^T * ~uplo( A_c )
182
183 bl1_swap_ints( m, n );
184
186 bl1_toggle_uplo( uplo );
187
190 }
191 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
192 {
193 // requested operation: C_r += uplo( A_r ) * B_r
194 // effective operation: C_c += B_c * conj( ~uplo( A_c ) )
198
199 bl1_swap_ints( m, n );
200
201 bl1_toggle_uplo( uplo );
203 }
204 }
205 }
206
207 // We need a temporary matrix for the cases where A is conjugated.
208 a_conj = a;
209 lda_conj = lda;
210 inca_conj = inca;
211
213 {
214 int dim_a;
215
217
219 lda_conj = dim_a;
220 inca_conj = 1;
221
222 bl1_ccopymrt( uplo,
224 dim_a,
225 dim_a,
226 a, inca, lda,
228 }
229 else if ( hemm_needs_conja && a_was_copied )
230 {
231 int dim_a;
232
234
235 bl1_cconjmr( uplo,
236 dim_a,
237 dim_a,
239 }
240
241 // We need a temporary matrix for the cases where B needs to be copied.
242 b_copy = b;
243 ldb_copy = ldb;
244 incb_copy = incb;
245
246 // There are two cases where we need to make a copy of B: one where the
247 // copy's dimensions are transposed from the original B, and one where
248 // the dimensions are not swapped.
249 if ( hemm_needs_copyb )
250 {
252
253 // Set transb, which determines whether or not we need to copy from B
254 // as if it needs a transposition. If a transposition is needed, then
255 // m and n and have already been swapped. So in either case m
256 // represents the leading dimension of the copy.
259
260 b_copy = bl1_callocm( m, n );
261 ldb_copy = m;
262 incb_copy = 1;
263
265 m,
266 n,
267 b, incb, ldb,
269 }
270
271 // There are two cases where we need to perform the hemm and then axpy
272 // the result into C with a transposition. We handle those cases here.
273 if ( hemm_needs_axpyt )
274 {
275 // We need a temporary matrix for holding C^T. Notice that m and n
276 // represent the dimensions of C, and thus C_trans is n-by-m
277 // (interpreting both as column-major matrices). So the leading
278 // dimension of the temporary matrix holding C^T is n.
279 c_trans = bl1_callocm( n, m );
280 ldc_trans = n;
281 incc_trans = 1;
282
283 // Compute A * B (or B * A) and store the result in C_trans.
284 // Note that there is no overlap between the axpyt cases and
285 // the conja/copyb cases, hence the use of a, b, lda, and ldb.
287 uplo,
288 n,
289 m,
290 alpha,
291 a, lda,
292 b, ldb,
293 &zero,
295
296 // Scale C by beta.
298 m,
299 n,
300 beta,
301 c, incc, ldc );
302
303 // And finally, accumulate the matrix product in C_trans into C
304 // with a transpose.
306 m,
307 n,
308 &one,
310 c, incc, ldc );
311
312 // Free the temporary matrix for C.
314 }
315 else // no extra axpyt step needed
316 {
318 uplo,
319 m,
320 n,
321 alpha,
324 beta,
325 c, ldc );
326 }
327
329 bl1_cfree( a_conj );
330
331 if ( hemm_needs_copyb )
332 bl1_cfree( b_copy );
333
334 // Free any temporary contiguous matrices, copying the result back to
335 // the original matrix.
337 &a, &a_rs, &a_cs );
338
340 &b, &b_rs, &b_cs );
341
343 n_save,
345 &c, &c_rs, &c_cs );
346}
void bl1_cconjmr(uplo1_t uplo, int m, int n, scomplex *a, int a_rs, int a_cs)
Definition bl1_conjmr.c:23
void bl1_ccopymrt(uplo1_t uplo, trans1_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
Definition bl1_copymrt.c:223
void bl1_chemm_blas(side1_t side, uplo1_t uplo, int m, int n, scomplex *alpha, scomplex *a, int lda, scomplex *b, int ldb, scomplex *beta, scomplex *c, int ldc)
Definition bl1_hemm.c:660
int bl1_zero_dim2(int m, int n)
Definition bl1_is.c:118
void bl1_ccreate_contigmr(uplo1_t uplo, int m, int n, scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition bl1_create_contigmr.c:77
void bl1_set_dim_with_side(side1_t side, int m, int n, int *dim_new)
Definition bl1_set_dims.c:27
trans1_t
Definition blis_type_defs.h:53
@ BLIS1_NO_TRANSPOSE
Definition blis_type_defs.h:54

References bl1_c0(), bl1_c1(), bl1_callocm(), bl1_caxpymt(), bl1_cconjmr(), bl1_ccopymrt(), bl1_ccopymt(), bl1_ccreate_contigm(), bl1_ccreate_contigmr(), bl1_cfree(), bl1_cfree_contigm(), bl1_cfree_saved_contigm(), bl1_chemm_blas(), bl1_cscalm(), bl1_is_col_storage(), bl1_set_dim_with_side(), bl1_zero_dim2(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, and BLIS1_TRANSPOSE.

Referenced by FLA_Hemm_external().

◆ bl1_chemm_blas()

void bl1_chemm_blas ( side1_t  side,
uplo1_t  uplo,
int  m,
int  n,
scomplex alpha,
scomplex a,
int  lda,
scomplex b,
int  ldb,
scomplex beta,
scomplex c,
int  ldc 
)
661{
662#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
666
669
673 m,
674 n,
675 alpha,
676 a, lda,
677 b, ldb,
678 beta,
679 c, ldc );
680#else
681 char blas_side;
682 char blas_uplo;
683
686
688 &blas_uplo,
689 &m,
690 &n,
691 alpha,
692 a, &lda,
693 b, &ldb,
694 beta,
695 c, &ldc );
696#endif
697}
void F77_chemm(char *side, char *uplo, int *m, int *n, scomplex *alpha, scomplex *a, int *lda, scomplex *b, int *ldb, scomplex *beta, scomplex *c, int *ldc)
CBLAS_UPLO
Definition blis_prototypes_cblas.h:19
CBLAS_SIDE
Definition blis_prototypes_cblas.h:21
void cblas_chemm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const void *beta, void *C, const int ldc)
void bl1_param_map_to_netlib_side(side1_t blis_side, void *blas_side)
Definition bl1_param_map.c:71
void bl1_param_map_to_netlib_uplo(uplo1_t blis_uplo, void *blas_uplo)
Definition bl1_param_map.c:47

References bl1_param_map_to_netlib_side(), bl1_param_map_to_netlib_uplo(), cblas_chemm(), CblasColMajor, and F77_chemm().

Referenced by bl1_chemm().

◆ bl1_cher2k()

void bl1_cher2k ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
scomplex alpha,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs,
float beta,
scomplex c,
int  c_rs,
int  c_cs 
)
40{
41 uplo1_t uplo_save = uplo;
42 int m_save = m;
43 scomplex* a_save = a;
44 scomplex* b_save = b;
45 scomplex* c_save = c;
46 int a_rs_save = a_rs;
47 int a_cs_save = a_cs;
48 int b_rs_save = b_rs;
49 int b_cs_save = b_cs;
50 int c_rs_save = c_rs;
51 int c_cs_save = c_cs;
52 float zero_r = bl1_s0();
58 int lda, inca;
59 int ldb, incb;
60 int ldc, incc;
68
69 // Return early if possible.
70 if ( bl1_zero_dim2( m, k ) ) return;
71
72 // If necessary, allocate, initialize, and use a temporary contiguous
73 // copy of each matrix rather than the original matrices.
75 m,
76 k,
78 &a, &a_rs, &a_cs );
79
81 m,
82 k,
84 &b, &b_rs, &b_cs );
85
87 m,
88 m,
90 &c, &c_rs, &c_cs );
91
92 // Initialize with values assuming column-major storage.
93 lda = a_cs;
94 inca = a_rs;
95 ldb = b_cs;
96 incb = b_rs;
97 ldc = c_cs;
98 incc = c_rs;
99
100 // Adjust the parameters based on the storage of each matrix.
101 if ( bl1_is_col_storage( c_rs, c_cs ) )
102 {
103 if ( bl1_is_col_storage( a_rs, a_cs ) )
104 {
105 if ( bl1_is_col_storage( b_rs, b_cs ) )
106 {
107 // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
108 // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
109 }
110 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
111 {
112 // requested operation: uplo( C_c ) += A_c * B_r' + B_r * A_c'
113 // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
115 }
116 }
117 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
118 {
119 if ( bl1_is_col_storage( b_rs, b_cs ) )
120 {
121 // requested operation: uplo( C_c ) += A_r * B_c' + B_c * A_r'
122 // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
124 }
125 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
126 {
127 // requested operation: uplo( C_c ) += A_r * B_r' + B_r * A_r'
128 // requested operation: uplo( C_c ) += conj( A_c' * B_c + B_c' * A_c )
131
133
136 }
137 }
138 }
139 else // if ( bl1_is_row_storage( c_rs, c_cs ) )
140 {
141 if ( bl1_is_col_storage( a_rs, a_cs ) )
142 {
143 if ( bl1_is_col_storage( b_rs, b_cs ) )
144 {
145 // requested operation: uplo( C_r ) += A_c * B_c' + B_c * A_c'
146 // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
148
149 bl1_toggle_uplo( uplo );
150
152 }
153 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
154 {
155 // requested operation: uplo( C_r ) += A_c * B_r' + B_r * A_c'
156 // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
158
160
161 bl1_toggle_uplo( uplo );
162
164 }
165 }
166 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
167 {
168 if ( bl1_is_col_storage( b_rs, b_cs ) )
169 {
170 // requested operation: uplo( C_r ) += A_r * B_c' + B_c * A_r'
171 // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
173
175
176 bl1_toggle_uplo( uplo );
177
179 }
180 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
181 {
182 // requested operation: uplo( C_r ) += A_r * B_r' + B_r * A_r'
183 // requested operation: ~uplo( C_c ) += A_c' * B_c + B_c' * A_c
187
188 bl1_toggle_uplo( uplo );
190
192 }
193 }
194 }
195
196 // Make a copy of alpha and conjugate if necessary.
197 alpha_copy = *alpha;
199 {
201 }
202
203 a_copy = a;
204 lda_copy = lda;
205 inca_copy = inca;
206
207 // There are two cases where we need to copy A column-major storage.
208 // We handle those two cases here.
209 if ( her2k_needs_copya )
210 {
211 int m_a;
212 int n_a;
213
214 // Determine the dimensions of A according to the value of trans. We
215 // need this in order to set the leading dimension of the copy of A.
217
218 // We need a temporary matrix to hold a column-major copy of A.
219 a_copy = bl1_callocm( m, k );
220 lda_copy = m_a;
221 inca_copy = 1;
222
223 // Copy the contents of A into A_copy.
225 m_a,
226 n_a,
227 a, inca, lda,
229 }
230
231 b_copy = b;
232 ldb_copy = ldb;
233 incb_copy = incb;
234
235 // There are two cases where we need to copy B column-major storage.
236 // We handle those two cases here.
237 if ( her2k_needs_copyb )
238 {
239 int m_b;
240 int n_b;
241
242 // Determine the dimensions of B according to the value of trans. We
243 // need this in order to set the leading dimension of the copy of B.
245
246 // We need a temporary matrix to hold a column-major copy of B.
247 b_copy = bl1_callocm( m, k );
248 ldb_copy = m_b;
249 incb_copy = 1;
250
251 // Copy the contents of B into B_copy.
253 m_b,
254 n_b,
255 b, incb, ldb,
257 }
258
259 // There are two cases where we need to perform the rank-2k product and
260 // then axpy the result into C with a conjugation. We handle those two
261 // cases here.
262 if ( her2k_needs_conj )
263 {
264 // We need a temporary matrix for holding the rank-k product.
265 c_conj = bl1_callocm( m, m );
266 ldc_conj = m;
267 incc_conj = 1;
268
269 // Compute the rank-2k product.
270 bl1_cher2k_blas( uplo,
271 trans,
272 m,
273 k,
274 &alpha_copy,
277 &zero_r,
278 c_conj, ldc_conj );
279
280 // Scale C by beta.
281 bl1_csscalmr( uplo,
282 m,
283 m,
284 beta,
285 c, incc, ldc );
286
287 // And finally, accumulate the rank-2k product in C_conj into C
288 // with a conjugation.
289 bl1_caxpymrt( uplo,
291 m,
292 m,
293 &one,
295 c, incc, ldc );
296
297 // Free the temporary matrix for C.
298 bl1_cfree( c_conj );
299 }
300 else
301 {
302 bl1_cher2k_blas( uplo,
303 trans,
304 m,
305 k,
306 &alpha_copy,
309 beta,
310 c, ldc );
311 }
312
313 if ( her2k_needs_copya )
314 bl1_cfree( a_copy );
315
316 if ( her2k_needs_copyb )
317 bl1_cfree( b_copy );
318
319 // Free any temporary contiguous matrices, copying the result back to
320 // the original matrix.
322 &a, &a_rs, &a_cs );
323
325 &b, &b_rs, &b_cs );
326
328 m_save,
329 m_save,
331 &c, &c_rs, &c_cs );
332}
void bl1_caxpymrt(uplo1_t uplo, trans1_t trans, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
Definition bl1_axpymrt.c:227
void bl1_cher2k_blas(uplo1_t uplo, trans1_t trans, int m, int k, scomplex *alpha, scomplex *a, int lda, scomplex *b, int ldb, float *beta, scomplex *c, int ldc)
Definition bl1_her2k.c:631
void bl1_csscalmr(uplo1_t uplo, int m, int n, float *alpha, scomplex *a, int a_rs, int a_cs)
Definition bl1_scalmr.c:125
float bl1_s0(void)
Definition bl1_constants.c:111
void bl1_set_dims_with_trans(trans1_t trans, int m, int n, int *m_new, int *n_new)
Definition bl1_set_dims.c:13
void bl1_cfree_saved_contigmr(uplo1_t uplo, int m, int n, scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition bl1_free_saved_contigmr.c:59
uplo1_t
Definition blis_type_defs.h:61

References bl1_c1(), bl1_callocm(), bl1_caxpymrt(), bl1_ccopymt(), bl1_ccreate_contigmr(), bl1_ccreate_contigmt(), bl1_cfree(), bl1_cfree_contigm(), bl1_cfree_saved_contigmr(), bl1_cher2k_blas(), bl1_csscalmr(), bl1_is_col_storage(), bl1_s0(), bl1_set_dims_with_trans(), bl1_zero_dim2(), BLIS1_CONJ_NO_TRANSPOSE, and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Her2k_external().

◆ bl1_cher2k_blas()

void bl1_cher2k_blas ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
scomplex alpha,
scomplex a,
int  lda,
scomplex b,
int  ldb,
float beta,
scomplex c,
int  ldc 
)
632{
633#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
637
640
644 m,
645 k,
646 alpha,
647 a, lda,
648 b, ldb,
649 *beta,
650 c, ldc );
651#else
652 char blas_uplo;
653 char blas_trans;
654
657
659 &blas_trans,
660 &m,
661 &k,
662 alpha,
663 a, &lda,
664 b, &ldb,
665 beta,
666 c, &ldc );
667#endif
668}
void F77_cher2k(char *uplo, char *transa, int *n, int *k, scomplex *alpha, scomplex *a, int *lda, scomplex *b, int *ldb, float *beta, scomplex *c, int *ldc)
void cblas_cher2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const float beta, void *C, const int ldc)

References bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), cblas_cher2k(), CblasColMajor, and F77_cher2k().

Referenced by bl1_cher2k().

◆ bl1_cherk()

void bl1_cherk ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
float alpha,
scomplex a,
int  a_rs,
int  a_cs,
float beta,
scomplex c,
int  c_rs,
int  c_cs 
)
37{
38 uplo1_t uplo_save = uplo;
39 int m_save = m;
40 scomplex* a_save = a;
41 scomplex* c_save = c;
42 int a_rs_save = a_rs;
43 int a_cs_save = a_cs;
44 int c_rs_save = c_rs;
45 int c_cs_save = c_cs;
46 float zero_r = bl1_s0();
49 int lda, inca;
50 int ldc, incc;
53
54 // Return early if possible.
55 if ( bl1_zero_dim2( m, k ) ) return;
56
57 // If necessary, allocate, initialize, and use a temporary contiguous
58 // copy of each matrix rather than the original matrices.
60 m,
61 k,
63 &a, &a_rs, &a_cs );
64
66 m,
67 m,
69 &c, &c_rs, &c_cs );
70
71 // Initialize with values assuming column-major storage.
72 lda = a_cs;
73 inca = a_rs;
74 ldc = c_cs;
75 incc = c_rs;
76
77 // Adjust the parameters based on the storage of each matrix.
79 {
81 {
82 // requested operation: uplo( C_c ) += A_c * A_c'
83 // effective operation: uplo( C_c ) += A_c * A_c'
84 }
85 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
86 {
87 // requested operation: uplo( C_c ) += A_r * A_r'
88 // effective operation: uplo( C_c ) += conj( A_c' * A_c )
90
92
94 }
95 }
96 else // if ( bl1_is_row_storage( c_rs, c_cs ) )
97 {
99 {
100 // requested operation: uplo( C_r ) += A_c * A_c'
101 // effective operation: ~uplo( C_c ) += conj( A_c * A_c' )
103
104 bl1_toggle_uplo( uplo );
105
107 }
108 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
109 {
110 // requested operation: uplo( C_r ) += A_r * A_r'
111 // effective operation: ~uplo( C_c ) += A_c' * A_c
114
115 bl1_toggle_uplo( uplo );
117 }
118 }
119
120 // There are two cases where we need to perform the rank-k product and
121 // then axpy the result into C with a conjugation. We handle those two
122 // cases here.
123 if ( herk_needs_conj )
124 {
125 // We need a temporary matrix for holding the rank-k product.
126 c_conj = bl1_callocm( m, m );
127 ldc_conj = m;
128 incc_conj = 1;
129
130 // Compute the rank-k product.
131 bl1_cherk_blas( uplo,
132 trans,
133 m,
134 k,
135 alpha,
136 a, lda,
137 &zero_r,
138 c_conj, ldc_conj );
139
140 // Scale C by beta.
141 bl1_csscalmr( uplo,
142 m,
143 m,
144 beta,
145 c, incc, ldc );
146
147 // And finally, accumulate the rank-k product in C_conj into C
148 // with a conjugation.
149 bl1_caxpymrt( uplo,
151 m,
152 m,
153 &one,
155 c, incc, ldc );
156
157 // Free the temporary matrix for C.
158 bl1_cfree( c_conj );
159 }
160 else
161 {
162 bl1_cherk_blas( uplo,
163 trans,
164 m,
165 k,
166 alpha,
167 a, lda,
168 beta,
169 c, ldc );
170 }
171
172 // Free any temporary contiguous matrices, copying the result back to
173 // the original matrix.
175 &a, &a_rs, &a_cs );
176
178 m_save,
179 m_save,
181 &c, &c_rs, &c_cs );
182}
void bl1_cherk_blas(uplo1_t uplo, trans1_t trans, int m, int k, float *alpha, scomplex *a, int lda, float *beta, scomplex *c, int ldc)
Definition bl1_herk.c:334

References bl1_c1(), bl1_callocm(), bl1_caxpymrt(), bl1_ccreate_contigmr(), bl1_ccreate_contigmt(), bl1_cfree(), bl1_cfree_contigm(), bl1_cfree_saved_contigmr(), bl1_cherk_blas(), bl1_csscalmr(), bl1_is_col_storage(), bl1_s0(), bl1_zero_dim2(), and BLIS1_CONJ_NO_TRANSPOSE.

Referenced by FLA_Herk_external(), and FLA_UDdate_UT_opc_var1().

◆ bl1_cherk_blas()

void bl1_cherk_blas ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
float alpha,
scomplex a,
int  lda,
float beta,
scomplex c,
int  ldc 
)
335{
336#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
340
343
347 m,
348 k,
349 *alpha,
350 a, lda,
351 *beta,
352 c, ldc );
353#else
354 char blas_uplo;
355 char blas_trans;
356
359
361 &blas_trans,
362 &m,
363 &k,
364 alpha,
365 a, &lda,
366 beta,
367 c, &ldc );
368#endif
369}
void F77_cherk(char *uplo, char *transa, int *n, int *k, float *alpha, scomplex *a, int *lda, float *beta, scomplex *c, int *ldc)
void cblas_cherk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const float alpha, const void *A, const int lda, const float beta, void *C, const int ldc)

References bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), cblas_cherk(), CblasColMajor, and F77_cherk().

Referenced by bl1_cherk().

◆ bl1_csymm()

void bl1_csymm ( side1_t  side,
uplo1_t  uplo,
int  m,
int  n,
scomplex alpha,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs,
scomplex beta,
scomplex c,
int  c_rs,
int  c_cs 
)
536{
537 int m_save = m;
538 int n_save = n;
539 scomplex* a_save = a;
540 scomplex* b_save = b;
541 scomplex* c_save = c;
542 int a_rs_save = a_rs;
543 int a_cs_save = a_cs;
544 int b_rs_save = b_rs;
545 int b_cs_save = b_cs;
546 int c_rs_save = c_rs;
547 int c_cs_save = c_cs;
548 scomplex zero = bl1_c0();
549 scomplex one = bl1_c1();
552 int dim_a;
553 int lda, inca;
554 int ldb, incb;
555 int ldc, incc;
556 int ldb_copy, incb_copy;
561
562 // Return early if possible.
563 if ( bl1_zero_dim2( m, n ) ) return;
564
565 // If necessary, allocate, initialize, and use a temporary contiguous
566 // copy of each matrix rather than the original matrices.
569 dim_a,
570 dim_a,
572 &a, &a_rs, &a_cs );
573
575 n,
577 &b, &b_rs, &b_cs );
578
580 n,
582 &c, &c_rs, &c_cs );
583
584 // Initialize with values assuming column-major storage.
585 lda = a_cs;
586 inca = a_rs;
587 ldb = b_cs;
588 incb = b_rs;
589 ldc = c_cs;
590 incc = c_rs;
591
592 // Adjust the parameters based on the storage of each matrix.
593 if ( bl1_is_col_storage( c_rs, c_cs ) )
594 {
595 if ( bl1_is_col_storage( a_rs, a_cs ) )
596 {
597 if ( bl1_is_col_storage( b_rs, b_cs ) )
598 {
599 // requested operation: C_c += uplo( A_c ) * B_c
600 // effective operation: C_c += uplo( A_c ) * B_c
601 }
602 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
603 {
604 // requested operation: C_c += uplo( A_c ) * B_r
605 // effective operation: C_c += uplo( A_c ) * B_c
607 }
608 }
609 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
610 {
611 if ( bl1_is_col_storage( b_rs, b_cs ) )
612 {
613 // requested operation: C_c += uplo( A_r ) * B_c
614 // effective operation: C_c += ~uplo( conj( A_c ) ) * B_c
616
617 bl1_toggle_uplo( uplo );
618 }
619 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
620 {
621 // requested operation: C_c += uplo( A_r ) * B_r
622 // effective operation: C_c += ( B_c * ~uplo( conj( A_c ) ) )^T
625
627 bl1_toggle_uplo( uplo );
628
630 }
631 }
632 }
633 else // if ( bl1_is_row_storage( c_rs, c_cs ) )
634 {
635 if ( bl1_is_col_storage( a_rs, a_cs ) )
636 {
637 if ( bl1_is_col_storage( b_rs, b_cs ) )
638 {
639 // requested operation: C_r += uplo( A_c ) * B_c
640 // effective operation: C_c += ( uplo( A_c ) * B_c )^T
642
643 bl1_swap_ints( m, n );
644
646 }
647 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
648 {
649 // requested operation: C_r += uplo( A_c ) * B_r
650 // effective operation: C_c += B_c * ~uplo( conj( A_c ) )
653
654 bl1_swap_ints( m, n );
655
657 }
658 }
659 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
660 {
661 if ( bl1_is_col_storage( b_rs, b_cs ) )
662 {
663 // requested operation: C_r += uplo( A_r ) * B_c
664 // effective operation: C_c += B_c^T * ~uplo( A_c )
667
668 bl1_swap_ints( m, n );
669
671 bl1_toggle_uplo( uplo );
672
675 }
676 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
677 {
678 // requested operation: C_r += uplo( A_r ) * B_r
679 // effective operation: C_c += B_c * conj( ~uplo( A_c ) )
683
684 bl1_swap_ints( m, n );
685
686 bl1_toggle_uplo( uplo );
688 }
689 }
690 }
691
692 // We need a temporary matrix for the cases where B needs to be copied.
693 b_copy = b;
694 ldb_copy = ldb;
695 incb_copy = incb;
696
697 // There are two cases where we need to make a copy of B: one where the
698 // copy's dimensions are transposed from the original B, and one where
699 // the dimensions are not swapped.
700 if ( symm_needs_copyb )
701 {
703
704 // Set transb, which determines whether or not we need to copy from B
705 // as if it needs a transposition. If a transposition is needed, then
706 // m and n and have already been swapped. So in either case m
707 // represents the leading dimension of the copy.
710
711 b_copy = bl1_callocm( m, n );
712 ldb_copy = m;
713 incb_copy = 1;
714
716 m,
717 n,
718 b, incb, ldb,
720 }
721
722 // There are two cases where we need to perform the symm and then axpy
723 // the result into C with a transposition. We handle those cases here.
724 if ( symm_needs_axpyt )
725 {
726 // We need a temporary matrix for holding C^T. Notice that m and n
727 // represent the dimensions of C, and thus C_trans is n-by-m
728 // (interpreting both as column-major matrices). So the leading
729 // dimension of the temporary matrix holding C^T is n.
730 c_trans = bl1_callocm( n, m );
731 ldc_trans = n;
732 incc_trans = 1;
733
734 // Compute A * B (or B * A) and store the result in C_trans.
735 // Note that there is no overlap between the axpyt cases and
736 // the conja/copyb cases, hence the use of a, b, lda, and ldb.
738 uplo,
739 n,
740 m,
741 alpha,
742 a, lda,
743 b, ldb,
744 &zero,
746
747 // Scale C by beta.
749 m,
750 n,
751 beta,
752 c, incc, ldc );
753
754 // And finally, accumulate the matrix product in C_trans into C
755 // with a transpose.
757 m,
758 n,
759 &one,
761 c, incc, ldc );
762
763 // Free the temporary matrix for C.
765 }
766 else // no extra axpyt step needed
767 {
769 uplo,
770 m,
771 n,
772 alpha,
773 a, lda,
775 beta,
776 c, ldc );
777 }
778
779 if ( symm_needs_copyb )
780 bl1_cfree( b_copy );
781
782 // Free any temporary contiguous matrices, copying the result back to
783 // the original matrix.
785 &a, &a_rs, &a_cs );
786
788 &b, &b_rs, &b_cs );
789
791 n_save,
793 &c, &c_rs, &c_cs );
794}
void bl1_csymm_blas(side1_t side, uplo1_t uplo, int m, int n, scomplex *alpha, scomplex *a, int lda, scomplex *b, int ldb, scomplex *beta, scomplex *c, int ldc)
Definition bl1_symm.c:1137

References bl1_c0(), bl1_c1(), bl1_callocm(), bl1_caxpymt(), bl1_ccopymt(), bl1_ccreate_contigm(), bl1_ccreate_contigmr(), bl1_cfree(), bl1_cfree_contigm(), bl1_cfree_saved_contigm(), bl1_cscalm(), bl1_csymm_blas(), bl1_is_col_storage(), bl1_set_dim_with_side(), bl1_zero_dim2(), BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, and BLIS1_TRANSPOSE.

Referenced by FLA_Symm_external().

◆ bl1_csymm_blas()

void bl1_csymm_blas ( side1_t  side,
uplo1_t  uplo,
int  m,
int  n,
scomplex alpha,
scomplex a,
int  lda,
scomplex b,
int  ldb,
scomplex beta,
scomplex c,
int  ldc 
)
1138{
1139#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
1143
1146
1148 cblas_side,
1149 cblas_uplo,
1150 m,
1151 n,
1152 alpha,
1153 a, lda,
1154 b, ldb,
1155 beta,
1156 c, ldc );
1157#else
1158 char blas_side;
1159 char blas_uplo;
1160
1163
1165 &blas_uplo,
1166 &m,
1167 &n,
1168 alpha,
1169 a, &lda,
1170 b, &ldb,
1171 beta,
1172 c, &ldc );
1173#endif
1174}
void F77_csymm(char *side, char *uplo, int *m, int *n, scomplex *alpha, scomplex *a, int *lda, scomplex *b, int *ldb, scomplex *beta, scomplex *c, int *ldc)
void cblas_csymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const void *beta, void *C, const int ldc)

References bl1_param_map_to_netlib_side(), bl1_param_map_to_netlib_uplo(), cblas_csymm(), CblasColMajor, and F77_csymm().

Referenced by bl1_csymm().

◆ bl1_csyr2k()

void bl1_csyr2k ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
scomplex alpha,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs,
scomplex beta,
scomplex c,
int  c_rs,
int  c_cs 
)
466{
467 uplo1_t uplo_save = uplo;
468 int m_save = m;
469 scomplex* a_save = a;
470 scomplex* b_save = b;
471 scomplex* c_save = c;
472 int a_rs_save = a_rs;
473 int a_cs_save = a_cs;
474 int b_rs_save = b_rs;
475 int b_cs_save = b_cs;
476 int c_rs_save = c_rs;
477 int c_cs_save = c_cs;
480 int lda, inca;
481 int ldb, incb;
482 int ldc, incc;
483 int lda_copy, inca_copy;
484 int ldb_copy, incb_copy;
487
488 // Return early if possible.
489 if ( bl1_zero_dim2( m, k ) ) return;
490
491 // If necessary, allocate, initialize, and use a temporary contiguous
492 // copy of each matrix rather than the original matrices.
494 m,
495 k,
497 &a, &a_rs, &a_cs );
498
500 m,
501 k,
503 &b, &b_rs, &b_cs );
504
506 m,
507 m,
509 &c, &c_rs, &c_cs );
510
511 // Initialize with values assuming column-major storage.
512 lda = a_cs;
513 inca = a_rs;
514 ldb = b_cs;
515 incb = b_rs;
516 ldc = c_cs;
517 incc = c_rs;
518
519 // Adjust the parameters based on the storage of each matrix.
520 if ( bl1_is_col_storage( c_rs, c_cs ) )
521 {
522 if ( bl1_is_col_storage( a_rs, a_cs ) )
523 {
524 if ( bl1_is_col_storage( b_rs, b_cs ) )
525 {
526 // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
527 // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
528 }
529 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
530 {
531 // requested operation: uplo( C_c ) += A_c * B_r' + B_r * A_c'
532 // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
534 }
535 }
536 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
537 {
538 if ( bl1_is_col_storage( b_rs, b_cs ) )
539 {
540 // requested operation: uplo( C_c ) += A_r * B_c' + B_c * A_r'
541 // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
543 }
544 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
545 {
546 // requested operation: uplo( C_c ) += A_r * B_r' + B_r * A_r'
547 // requested operation: uplo( C_c ) += conj( A_c' * B_c + B_c' * A_c )
550
552 }
553 }
554 }
555 else // if ( bl1_is_row_storage( c_rs, c_cs ) )
556 {
557 if ( bl1_is_col_storage( a_rs, a_cs ) )
558 {
559 if ( bl1_is_col_storage( b_rs, b_cs ) )
560 {
561 // requested operation: uplo( C_r ) += A_c * B_c' + B_c * A_c'
562 // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
564
565 bl1_toggle_uplo( uplo );
566 }
567 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
568 {
569 // requested operation: uplo( C_r ) += A_c * B_r' + B_r * A_c'
570 // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
572
574
575 bl1_toggle_uplo( uplo );
576 }
577 }
578 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
579 {
580 if ( bl1_is_col_storage( b_rs, b_cs ) )
581 {
582 // requested operation: uplo( C_r ) += A_r * B_c' + B_c * A_r'
583 // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
585
587
588 bl1_toggle_uplo( uplo );
589 }
590 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
591 {
592 // requested operation: uplo( C_r ) += A_r * B_r' + B_r * A_r'
593 // requested operation: ~uplo( C_c ) += A_c' * B_c + B_c' * A_c
597
598 bl1_toggle_uplo( uplo );
600 }
601 }
602 }
603
604 a_copy = a;
605 lda_copy = lda;
606 inca_copy = inca;
607
608 // There are two cases where we need to copy A column-major storage.
609 // We handle those two cases here.
610 if ( syr2k_needs_copya )
611 {
612 int m_a;
613 int n_a;
614
615 // Determine the dimensions of A according to the value of trans. We
616 // need this in order to set the leading dimension of the copy of A.
618
619 // We need a temporary matrix to hold a column-major copy of A.
620 a_copy = bl1_callocm( m, k );
621 lda_copy = m_a;
622 inca_copy = 1;
623
624 // Copy the contents of A into A_copy.
626 m_a,
627 n_a,
628 a, inca, lda,
630 }
631
632 b_copy = b;
633 ldb_copy = ldb;
634 incb_copy = incb;
635
636 // There are two cases where we need to copy B column-major storage.
637 // We handle those two cases here.
638 if ( syr2k_needs_copyb )
639 {
640 int m_b;
641 int n_b;
642
643 // Determine the dimensions of B according to the value of trans. We
644 // need this in order to set the leading dimension of the copy of B.
646
647 // We need a temporary matrix to hold a column-major copy of B.
648 b_copy = bl1_callocm( m, k );
649 ldb_copy = m_b;
650 incb_copy = 1;
651
652 // Copy the contents of B into B_copy.
654 m_b,
655 n_b,
656 b, incb, ldb,
658 }
659
660 bl1_csyr2k_blas( uplo,
661 trans,
662 m,
663 k,
664 alpha,
667 beta,
668 c, ldc );
669
670 if ( syr2k_needs_copya )
671 bl1_cfree( a_copy );
672
673 if ( syr2k_needs_copyb )
674 bl1_cfree( b_copy );
675
676 // Free any temporary contiguous matrices, copying the result back to
677 // the original matrix.
679 &a, &a_rs, &a_cs );
680
682 &b, &b_rs, &b_cs );
683
685 m_save,
686 m_save,
688 &c, &c_rs, &c_cs );
689}
void bl1_csyr2k_blas(uplo1_t uplo, trans1_t trans, int m, int k, scomplex *alpha, scomplex *a, int lda, scomplex *b, int ldb, scomplex *beta, scomplex *c, int ldc)
Definition bl1_syr2k.c:1013

References bl1_callocm(), bl1_ccopymt(), bl1_ccreate_contigmr(), bl1_ccreate_contigmt(), bl1_cfree(), bl1_cfree_contigm(), bl1_cfree_saved_contigmr(), bl1_csyr2k_blas(), bl1_is_col_storage(), bl1_set_dims_with_trans(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Syr2k_external().

◆ bl1_csyr2k_blas()

void bl1_csyr2k_blas ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
scomplex alpha,
scomplex a,
int  lda,
scomplex b,
int  ldb,
scomplex beta,
scomplex c,
int  ldc 
)
1014{
1015#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
1019
1020 // BLAS doesn't recognize the conjugate-transposition constant for syr2k,
1021 // so we have to map it down to regular transposition.
1023
1026
1028 cblas_uplo,
1030 m,
1031 k,
1032 alpha,
1033 a, lda,
1034 b, ldb,
1035 beta,
1036 c, ldc );
1037#else
1038 char blas_uplo;
1039 char blas_trans;
1040
1041 // BLAS doesn't recognize the conjugate-transposition constant for syr2k,
1042 // so we have to map it down to regular transposition.
1044
1047
1049 &blas_trans,
1050 &m,
1051 &k,
1052 alpha,
1053 a, &lda,
1054 b, &ldb,
1055 beta,
1056 c, &ldc );
1057#endif
1058}
void F77_csyr2k(char *uplo, char *transa, int *n, int *k, scomplex *alpha, scomplex *a, int *lda, scomplex *b, int *ldb, scomplex *beta, scomplex *c, int *ldc)
void cblas_csyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const void *beta, void *C, const int ldc)
int bl1_is_conjtrans(trans1_t trans)
Definition bl1_is.c:30

References bl1_is_conjtrans(), bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), BLIS1_TRANSPOSE, cblas_csyr2k(), CblasColMajor, and F77_csyr2k().

Referenced by bl1_csyr2k().

◆ bl1_csyrk()

void bl1_csyrk ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
scomplex alpha,
scomplex a,
int  a_rs,
int  a_cs,
scomplex beta,
scomplex c,
int  c_rs,
int  c_cs 
)
206{
207 uplo1_t uplo_save = uplo;
208 int m_save = m;
209 scomplex* a_save = a;
210 scomplex* c_save = c;
211 int a_rs_save = a_rs;
212 int a_cs_save = a_cs;
213 int c_rs_save = c_rs;
214 int c_cs_save = c_cs;
215 int lda, inca;
216 int ldc, incc;
217
218 // Return early if possible.
219 if ( bl1_zero_dim2( m, k ) ) return;
220
221 // If necessary, allocate, initialize, and use a temporary contiguous
222 // copy of each matrix rather than the original matrices.
224 m,
225 k,
227 &a, &a_rs, &a_cs );
228
230 m,
231 m,
233 &c, &c_rs, &c_cs );
234
235 // Initialize with values assuming column-major storage.
236 lda = a_cs;
237 inca = a_rs;
238 ldc = c_cs;
239 incc = c_rs;
240
241 // Adjust the parameters based on the storage of each matrix.
242 if ( bl1_is_col_storage( c_rs, c_cs ) )
243 {
244 if ( bl1_is_col_storage( a_rs, a_cs ) )
245 {
246 // requested operation: uplo( C_c ) += A_c * A_c^T
247 // effective operation: uplo( C_c ) += A_c * A_c^T
248 }
249 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
250 {
251 // requested operation: uplo( C_c ) += A_r * A_r^T
252 // effective operation: uplo( C_c ) += A_c^T * A_c
254
256 }
257 }
258 else // if ( bl1_is_row_storage( c_rs, c_cs ) )
259 {
260 if ( bl1_is_col_storage( a_rs, a_cs ) )
261 {
262 // requested operation: uplo( C_r ) += A_c * A_c^T
263 // effective operation: ~uplo( C_c ) += A_c * A_c^T
265
266 bl1_toggle_uplo( uplo );
267 }
268 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
269 {
270 // requested operation: uplo( C_r ) += A_r * A_r^T
271 // effective operation: ~uplo( C_c ) += A_c^T * A_c
274
275 bl1_toggle_uplo( uplo );
277 }
278 }
279
280 bl1_csyrk_blas( uplo,
281 trans,
282 m,
283 k,
284 alpha,
285 a, lda,
286 beta,
287 c, ldc );
288
289 // Free any temporary contiguous matrices, copying the result back to
290 // the original matrix.
292 &a, &a_rs, &a_cs );
293
295 m_save,
296 m_save,
298 &c, &c_rs, &c_cs );
299}
void bl1_csyrk_blas(uplo1_t uplo, trans1_t trans, int m, int k, scomplex *alpha, scomplex *a, int lda, scomplex *beta, scomplex *c, int ldc)
Definition bl1_syrk.c:473

References bl1_ccreate_contigmr(), bl1_ccreate_contigmt(), bl1_cfree_contigm(), bl1_cfree_saved_contigmr(), bl1_csyrk_blas(), bl1_is_col_storage(), and bl1_zero_dim2().

Referenced by FLA_Syrk_external().

◆ bl1_csyrk_blas()

void bl1_csyrk_blas ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
scomplex alpha,
scomplex a,
int  lda,
scomplex beta,
scomplex c,
int  ldc 
)
474{
475#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
479
482
486 m,
487 k,
488 alpha,
489 a, lda,
490 beta,
491 c, ldc );
492#else
493 char blas_uplo;
494 char blas_trans;
495
498
500 &blas_trans,
501 &m,
502 &k,
503 alpha,
504 a, &lda,
505 beta,
506 c, &ldc );
507#endif
508}
void F77_csyrk(char *uplo, char *transa, int *n, int *k, scomplex *alpha, scomplex *a, int *lda, scomplex *beta, scomplex *c, int *ldc)
void cblas_csyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const void *alpha, const void *A, const int lda, const void *beta, void *C, const int ldc)

References bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), cblas_csyrk(), CblasColMajor, and F77_csyrk().

Referenced by bl1_csyrk().

◆ bl1_ctrmm()

void bl1_ctrmm ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
scomplex alpha,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)
220{
221 int m_save = m;
222 int n_save = n;
223 scomplex* a_save = a;
224 scomplex* b_save = b;
225 int a_rs_save = a_rs;
226 int a_cs_save = a_cs;
227 int b_rs_save = b_rs;
228 int b_cs_save = b_cs;
230 int dim_a;
231 int lda, inca;
232 int ldb, incb;
233 int lda_conj, inca_conj;
234 int a_was_copied;
235
236 // Return early if possible.
237 if ( bl1_zero_dim2( m, n ) ) return;
238
239 // If necessary, allocate, initialize, and use a temporary contiguous
240 // copy of each matrix rather than the original matrices.
243 dim_a,
244 dim_a,
246 &a, &a_rs, &a_cs );
247
249 n,
251 &b, &b_rs, &b_cs );
252
253 // Figure out whether A was copied to contiguous memory. This is used to
254 // prevent redundant copying.
255 a_was_copied = ( a != a_save );
256
257 // Initialize with values assuming column-major storage.
258 lda = a_cs;
259 inca = a_rs;
260 ldb = b_cs;
261 incb = b_rs;
262
263 // Adjust the parameters based on the storage of each matrix.
264 if ( bl1_is_col_storage( b_rs, b_cs ) )
265 {
266 if ( bl1_is_col_storage( a_rs, a_cs ) )
267 {
268 // requested operation: B_c := tr( uplo( A_c ) ) * B_c
269 // effective operation: B_c := tr( uplo( A_c ) ) * B_c
270 }
271 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
272 {
273 // requested operation: B_c := tr( uplo( A_r ) ) * B_c
274 // effective operation: B_c := tr( ~uplo( A_c ) )^T * B_c
276
277 bl1_toggle_uplo( uplo );
279 }
280 }
281 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
282 {
283 if ( bl1_is_col_storage( a_rs, a_cs ) )
284 {
285 // requested operation: B_r := tr( uplo( A_c ) ) * B_r
286 // effective operation: B_c := B_c * tr( uplo( A_c ) )^T
288
289 bl1_swap_ints( m, n );
290
293 }
294 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
295 {
296 // requested operation: B_r := tr( uplo( A_r ) ) * B_r
297 // effective operation: B_c := B_c * tr( ~uplo( A_c ) )
300
301 bl1_swap_ints( m, n );
302
303 bl1_toggle_uplo( uplo );
305 }
306 }
307
308 // Initialize with values assuming that trans is not conjnotrans.
309 a_conj = a;
310 lda_conj = lda;
311 inca_conj = inca;
312
313 // We want to handle the conjnotrans case. The easiest way to do so is
314 // by making a conjugated copy of A.
316 {
317 int dim_a;
318
320
322 lda_conj = dim_a;
323 inca_conj = 1;
324
325 bl1_ccopymrt( uplo,
327 dim_a,
328 dim_a,
329 a, inca, lda,
331 }
332 else if ( bl1_is_conjnotrans( trans ) && a_was_copied )
333 {
334 int dim_a;
335
337
338 bl1_cconjmr( uplo,
339 dim_a,
340 dim_a,
342 }
343
344
346 uplo,
347 trans,
348 diag,
349 m,
350 n,
351 alpha,
353 b, ldb );
354
356 bl1_cfree( a_conj );
357
358 // Free any temporary contiguous matrices, copying the result back to
359 // the original matrix.
361 &a, &a_rs, &a_cs );
362
364 n_save,
366 &b, &b_rs, &b_cs );
367}
void bl1_ctrmm_blas(side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, scomplex *alpha, scomplex *a, int lda, scomplex *b, int ldb)
Definition bl1_trmm.c:614

References bl1_callocm(), bl1_cconjmr(), bl1_ccopymrt(), bl1_ccreate_contigm(), bl1_ccreate_contigmr(), bl1_cfree(), bl1_cfree_contigm(), bl1_cfree_saved_contigm(), bl1_ctrmm_blas(), bl1_is_col_storage(), bl1_is_conjnotrans(), bl1_set_dim_with_side(), bl1_zero_dim2(), and BLIS1_CONJ_NO_TRANSPOSE.

Referenced by bl1_ctrmmsx(), and FLA_Trmm_external().

◆ bl1_ctrmm_blas()

void bl1_ctrmm_blas ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
scomplex alpha,
scomplex a,
int  lda,
scomplex b,
int  ldb 
)
615{
616#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
622
627
633 m,
634 n,
635 alpha,
636 a, lda,
637 b, ldb );
638#else
639 char blas_side;
640 char blas_uplo;
641 char blas_trans;
642 char blas_diag;
643
648
650 &blas_uplo,
651 &blas_trans,
652 &blas_diag,
653 &m,
654 &n,
655 alpha,
656 a, &lda,
657 b, &ldb );
658#endif
659}
void F77_ctrmm(char *side, char *uplo, char *transa, char *diag, int *m, int *n, scomplex *alpha, scomplex *a, int *lda, scomplex *b, int *ldb)
CBLAS_DIAG
Definition blis_prototypes_cblas.h:20
void cblas_ctrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const void *alpha, const void *A, const int lda, void *B, const int ldb)
void bl1_param_map_to_netlib_diag(diag1_t blis_diag, void *blas_diag)
Definition bl1_param_map.c:95

References bl1_param_map_to_netlib_diag(), bl1_param_map_to_netlib_side(), bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), cblas_ctrmm(), CblasColMajor, and F77_ctrmm().

Referenced by bl1_ctrmm().

◆ bl1_ctrmmsx()

void bl1_ctrmmsx ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
scomplex alpha,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs,
scomplex beta,
scomplex c,
int  c_rs,
int  c_cs 
)
226{
227 int m_save = m;
228 int n_save = n;
229 scomplex* a_save = a;
230 scomplex* b_save = b;
231 scomplex* c_save = c;
232 int a_rs_save = a_rs;
233 int a_cs_save = a_cs;
234 int b_rs_save = b_rs;
235 int b_cs_save = b_cs;
236 int c_rs_save = c_rs;
237 int c_cs_save = c_cs;
238 scomplex one = bl1_c1();
240 int dim_a;
241 int b_copy_rs, b_copy_cs;
242
243 // Return early if possible.
244 if ( bl1_zero_dim2( m, n ) ) return;
245
246 // If necessary, allocate, initialize, and use a temporary contiguous
247 // copy of each matrix rather than the original matrices.
250 dim_a,
251 dim_a,
253 &a, &a_rs, &a_cs );
254
256 n,
258 &b, &b_rs, &b_cs );
259
261 n,
263 &c, &c_rs, &c_cs );
264
265 // Create a copy of B to use in the computation so the original matrix is
266 // left untouched.
267 b_copy = bl1_callocm( m, n );
268
269 // Match the strides of B_copy to that of B.
270 if ( bl1_is_col_storage( b_rs, b_cs ) )
271 {
272 b_copy_rs = 1;
273 b_copy_cs = m;
274 }
275 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
276 {
277 b_copy_rs = n;
278 b_copy_cs = 1;
279 }
280
281 // Copy the contents of B to B_copy.
283 m,
284 n,
285 b, b_rs, b_cs,
287
288 // Perform the operation on B_copy.
290 uplo,
291 trans,
292 diag,
293 m,
294 n,
295 alpha,
296 a, a_rs, a_cs,
298
299 // Scale C by beta.
301 m,
302 n,
303 beta,
304 c, c_rs, c_cs );
305
306 // Add B_copy into C.
308 m,
309 n,
310 &one,
312 c, c_rs, c_cs );
313
314 // Free the copy of B.
315 bl1_cfree( b_copy );
316
317 // Free any temporary contiguous matrices, copying the result back to
318 // the original matrix.
320 &a, &a_rs, &a_cs );
321
323 &b, &b_rs, &b_cs );
324
326 n_save,
328 &c, &c_rs, &c_cs );
329}
void bl1_ctrmm(side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
Definition bl1_trmm.c:219

References bl1_c1(), bl1_callocm(), bl1_caxpymt(), bl1_ccopymt(), bl1_ccreate_contigm(), bl1_ccreate_contigmr(), bl1_cfree(), bl1_cfree_contigm(), bl1_cfree_saved_contigm(), bl1_cscalm(), bl1_ctrmm(), bl1_is_col_storage(), bl1_set_dim_with_side(), bl1_zero_dim2(), BLIS1_NO_CONJUGATE, and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Trmmsx_external().

◆ bl1_ctrsm()

void bl1_ctrsm ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
scomplex alpha,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)
220{
221 int m_save = m;
222 int n_save = n;
223 scomplex* a_save = a;
224 scomplex* b_save = b;
225 int a_rs_save = a_rs;
226 int a_cs_save = a_cs;
227 int b_rs_save = b_rs;
228 int b_cs_save = b_cs;
230 int dim_a;
231 int lda, inca;
232 int ldb, incb;
233 int lda_conj, inca_conj;
234 int a_was_copied;
235
236 // Return early if possible.
237 if ( bl1_zero_dim2( m, n ) ) return;
238
239 // If necessary, allocate, initialize, and use a temporary contiguous
240 // copy of each matrix rather than the original matrices.
243 dim_a,
244 dim_a,
246 &a, &a_rs, &a_cs );
247
249 n,
251 &b, &b_rs, &b_cs );
252
253 // Figure out whether A was copied to contiguous memory. This is used to
254 // prevent redundant copying.
255 a_was_copied = ( a != a_save );
256
257 // Initialize with values assuming column-major storage.
258 lda = a_cs;
259 inca = a_rs;
260 ldb = b_cs;
261 incb = b_rs;
262
263 // Adjust the parameters based on the storage of each matrix.
264 if ( bl1_is_col_storage( b_rs, b_cs ) )
265 {
266 if ( bl1_is_col_storage( a_rs, a_cs ) )
267 {
268 // requested operation: B_c := tr( uplo( A_c ) ) * B_c
269 // effective operation: B_c := tr( uplo( A_c ) ) * B_c
270 }
271 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
272 {
273 // requested operation: B_c := tr( uplo( A_r ) ) * B_c
274 // effective operation: B_c := tr( ~uplo( A_c ) )^T * B_c
276
277 bl1_toggle_uplo( uplo );
279 }
280 }
281 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
282 {
283 if ( bl1_is_col_storage( a_rs, a_cs ) )
284 {
285 // requested operation: B_r := tr( uplo( A_c ) ) * B_r
286 // effective operation: B_c := B_c * tr( uplo( A_c ) )^T
288
289 bl1_swap_ints( m, n );
290
293 }
294 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
295 {
296 // requested operation: B_r := tr( uplo( A_r ) ) * B_r
297 // effective operation: B_c := B_c * tr( ~uplo( A_c ) )
300
301 bl1_swap_ints( m, n );
302
303 bl1_toggle_uplo( uplo );
305 }
306 }
307
308 // Initialize with values assuming that trans is not conjnotrans.
309 a_conj = a;
310 lda_conj = lda;
311 inca_conj = inca;
312
313 // We want to handle the conjnotrans case. The easiest way to do so is
314 // by making a conjugated copy of A.
316 {
317 int dim_a;
318
320
322 lda_conj = dim_a;
323 inca_conj = 1;
324
325 bl1_ccopymrt( uplo,
327 dim_a,
328 dim_a,
329 a, inca, lda,
331 }
332 else if ( bl1_is_conjnotrans( trans ) && a_was_copied )
333 {
334 int dim_a;
335
337
338 bl1_cconjmr( uplo,
339 dim_a,
340 dim_a,
342 }
343
344
346 uplo,
347 trans,
348 diag,
349 m,
350 n,
351 alpha,
353 b, ldb );
354
356 bl1_cfree( a_conj );
357
358 // Free any temporary contiguous matrices, copying the result back to
359 // the original matrix.
361 &a, &a_rs, &a_cs );
362
364 n_save,
366 &b, &b_rs, &b_cs );
367}
void bl1_ctrsm_blas(side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, scomplex *alpha, scomplex *a, int lda, scomplex *b, int ldb)
Definition bl1_trsm.c:614

References bl1_callocm(), bl1_cconjmr(), bl1_ccopymrt(), bl1_ccreate_contigm(), bl1_ccreate_contigmr(), bl1_cfree(), bl1_cfree_contigm(), bl1_cfree_saved_contigm(), bl1_ctrsm_blas(), bl1_is_col_storage(), bl1_is_conjnotrans(), bl1_set_dim_with_side(), bl1_zero_dim2(), and BLIS1_CONJ_NO_TRANSPOSE.

Referenced by bl1_ctrsmsx(), FLA_LU_nopiv_opc_var1(), FLA_LU_nopiv_opc_var2(), FLA_LU_nopiv_opc_var3(), FLA_LU_piv_opc_var3(), and FLA_Trsm_external().

◆ bl1_ctrsm_blas()

void bl1_ctrsm_blas ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
scomplex alpha,
scomplex a,
int  lda,
scomplex b,
int  ldb 
)
615{
616#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
622
627
633 m,
634 n,
635 alpha,
636 a, lda,
637 b, ldb );
638#else
639 char blas_side;
640 char blas_uplo;
641 char blas_trans;
642 char blas_diag;
643
648
650 &blas_uplo,
651 &blas_trans,
652 &blas_diag,
653 &m,
654 &n,
655 alpha,
656 a, &lda,
657 b, &ldb );
658#endif
659}
void F77_ctrsm(char *side, char *uplo, char *transa, char *diag, int *m, int *n, scomplex *alpha, scomplex *a, int *lda, scomplex *b, int *ldb)
void cblas_ctrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const void *alpha, const void *A, const int lda, void *B, const int ldb)

References bl1_param_map_to_netlib_diag(), bl1_param_map_to_netlib_side(), bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), cblas_ctrsm(), CblasColMajor, and F77_ctrsm().

Referenced by bl1_ctrsm().

◆ bl1_ctrsmsx()

void bl1_ctrsmsx ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
scomplex alpha,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs,
scomplex beta,
scomplex c,
int  c_rs,
int  c_cs 
)
226{
227 int m_save = m;
228 int n_save = n;
229 scomplex* a_save = a;
230 scomplex* b_save = b;
231 scomplex* c_save = c;
232 int a_rs_save = a_rs;
233 int a_cs_save = a_cs;
234 int b_rs_save = b_rs;
235 int b_cs_save = b_cs;
236 int c_rs_save = c_rs;
237 int c_cs_save = c_cs;
238 scomplex one = bl1_c1();
240 int dim_a;
241 int b_copy_rs, b_copy_cs;
242
243 // Return early if possible.
244 if ( bl1_zero_dim2( m, n ) ) return;
245
246 // If necessary, allocate, initialize, and use a temporary contiguous
247 // copy of each matrix rather than the original matrices.
250 dim_a,
251 dim_a,
253 &a, &a_rs, &a_cs );
254
256 n,
258 &b, &b_rs, &b_cs );
259
261 n,
263 &c, &c_rs, &c_cs );
264
265 // Create a copy of B to use in the computation so the original matrix is
266 // left untouched.
267 b_copy = bl1_callocm( m, n );
268
269 // Match the strides of B_copy to that of B.
270 if ( bl1_is_col_storage( b_rs, b_cs ) )
271 {
272 b_copy_rs = 1;
273 b_copy_cs = m;
274 }
275 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
276 {
277 b_copy_rs = n;
278 b_copy_cs = 1;
279 }
280
281 // Copy the contents of B to B_copy.
283 m,
284 n,
285 b, b_rs, b_cs,
287
288 // Perform the operation on B_copy.
290 uplo,
291 trans,
292 diag,
293 m,
294 n,
295 alpha,
296 a, a_rs, a_cs,
298
299 // Scale C by beta.
301 m,
302 n,
303 beta,
304 c, c_rs, c_cs );
305
306 // Add B_copy into C.
308 m,
309 n,
310 &one,
312 c, c_rs, c_cs );
313
314 // Free the copy of B.
315 bl1_cfree( b_copy );
316
317 // Free any temporary contiguous matrices, copying the result back to
318 // the original matrix.
320 &a, &a_rs, &a_cs );
321
323 &b, &b_rs, &b_cs );
324
326 n_save,
328 &c, &c_rs, &c_cs );
329}
void bl1_ctrsm(side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
Definition bl1_trsm.c:219

References bl1_c1(), bl1_callocm(), bl1_caxpymt(), bl1_ccopymt(), bl1_ccreate_contigm(), bl1_ccreate_contigmr(), bl1_cfree(), bl1_cfree_contigm(), bl1_cfree_saved_contigm(), bl1_cscalm(), bl1_ctrsm(), bl1_is_col_storage(), bl1_set_dim_with_side(), bl1_zero_dim2(), BLIS1_NO_CONJUGATE, and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Trsmsx_external().

◆ bl1_dgemm()

void bl1_dgemm ( trans1_t  transa,
trans1_t  transb,
int  m,
int  k,
int  n,
double alpha,
double a,
int  a_rs,
int  a_cs,
double b,
int  b_rs,
int  b_cs,
double beta,
double c,
int  c_rs,
int  c_cs 
)
275{
276 int m_save = m;
277 int n_save = n;
278 double* a_save = a;
279 double* b_save = b;
280 double* c_save = c;
281 int a_rs_save = a_rs;
282 int a_cs_save = a_cs;
283 int b_rs_save = b_rs;
284 int b_cs_save = b_cs;
285 int c_rs_save = c_rs;
286 int c_cs_save = c_cs;
287 double zero = bl1_d0();
288 double one = bl1_d1();
289 double* a_unswap;
290 double* b_unswap;
291 double* c_trans;
292 int lda, inca;
293 int ldb, incb;
294 int ldc, incc;
296 int m_gemm, n_gemm;
298
299 // Return early if possible.
300 if ( bl1_zero_dim3( m, k, n ) )
301 {
303 m,
304 n,
305 beta,
306 c, c_rs, c_cs );
307 return;
308 }
309
310 // If necessary, allocate, initialize, and use a temporary contiguous
311 // copy of each matrix rather than the original matrices.
313 m,
314 k,
316 &a, &a_rs, &a_cs );
317
319 k,
320 n,
322 &b, &b_rs, &b_cs );
323
325 n,
327 &c, &c_rs, &c_cs );
328
329 // These are used to track the original values of a and b prior to any
330 // operand swapping that might take place. This is necessary for proper
331 // freeing of memory when one is a temporary contiguous matrix.
332 a_unswap = a;
333 b_unswap = b;
334
335 // These are used to track the dimensions of the product of the
336 // A and B operands to the BLAS invocation of gemm. These differ
337 // from m and n when the operands need to be swapped.
338 m_gemm = m;
339 n_gemm = n;
340
341 // Initialize with values assuming column-major storage.
342 lda = a_cs;
343 inca = a_rs;
344 ldb = b_cs;
345 incb = b_rs;
346 ldc = c_cs;
347 incc = c_rs;
348
349 // Adjust the parameters based on the storage of each matrix.
350 if ( bl1_is_col_storage( c_rs, c_cs ) )
351 {
352 if ( bl1_is_col_storage( a_rs, a_cs ) )
353 {
354 if ( bl1_is_col_storage( b_rs, b_cs ) )
355 {
356 // requested operation: C_c += tr( A_c ) * tr( B_c )
357 // effective operation: C_c += tr( A_c ) * tr( B_c )
358 }
359 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
360 {
361
362 // requested operation: C_c += tr( A_c ) * tr( B_r )
363 // effective operation: C_c += tr( A_c ) * tr( B_c )^T
365
367 }
368 }
369 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
370 {
371 if ( bl1_is_col_storage( b_rs, b_cs ) )
372 {
373 // requested operation: C_c += tr( A_r ) * tr( B_c )
374 // effective operation: C_c += tr( A_r )^T * tr( B_c )
376
378 }
379 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
380 {
381 // requested operation: C_c += tr( A_r ) * tr( B_r )
382 // effective operation: C_c += ( tr( B_c ) * tr( A_c ) )^T
385
390
393 }
394 }
395 }
396 else // if ( bl1_is_row_storage( c_rs, c_cs ) )
397 {
398 if ( bl1_is_col_storage( a_rs, a_cs ) )
399 {
400 if ( bl1_is_col_storage( b_rs, b_cs ) )
401 {
402 // requested operation: C_r += tr( A_c ) * tr( B_c )
403 // effective operation: C_c += ( tr( A_c ) * tr( B_c ) )^T
405
406 bl1_swap_ints( m, n );
407
409 }
410 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
411 {
412 // requested operation: C_r += tr( A_c ) * tr( B_r )
413 // effective operation: C_c += tr( B_c ) * tr( A_c )^T
416
418
419 bl1_swap_ints( m, n );
425 }
426 }
427 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
428 {
429 if ( bl1_is_col_storage( b_rs, b_cs ) )
430 {
431 // requested operation: C_r += tr( A_r ) * tr( B_c )
432 // effective operation: C_c += tr( B_c )^T * tr( A_c )
435
437
438 bl1_swap_ints( m, n );
444 }
445 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
446 {
447 // requested operation: C_r += tr( A_r ) * tr( B_r )
448 // effective operation: C_c += tr( B_c ) * tr( A_c )
452
453 bl1_swap_ints( m, n );
459 }
460 }
461 }
462
463 // There are two cases where we need to perform the gemm and then axpy
464 // the result into C with a transposition. We handle those cases here.
465 if ( gemm_needs_axpyt )
466 {
467 // We need a temporary matrix for holding C^T. Notice that m and n
468 // represent the dimensions of C, while m_gemm and n_gemm are the
469 // dimensions of the actual product op(A)*op(B), which may be n-by-m
470 // since the operands may have been swapped.
473 incc_trans = 1;
474
475 // Compute tr( A ) * tr( B ), where A and B may have been swapped
476 // to reference the other, and store the result in C_trans.
478 transb,
479 m_gemm,
480 n_gemm,
481 k,
482 alpha,
483 a, lda,
484 b, ldb,
485 &zero,
487
488 // Scale C by beta.
490 m,
491 n,
492 beta,
493 c, incc, ldc );
494
495 // And finally, accumulate the matrix product in C_trans into C
496 // with a transpose.
498 m,
499 n,
500 &one,
502 c, incc, ldc );
503
504 // Free the temporary matrix for C.
506 }
507 else // no extra axpyt step needed
508 {
510 transb,
511 m_gemm,
512 n_gemm,
513 k,
514 alpha,
515 a, lda,
516 b, ldb,
517 beta,
518 c, ldc );
519 }
520
521 // Free any temporary contiguous matrices, copying the result back to
522 // the original matrix.
524 &a_unswap, &a_rs, &a_cs );
525
527 &b_unswap, &b_rs, &b_cs );
528
530 n_save,
532 &c, &c_rs, &c_cs );
533}
void bl1_daxpymt(trans1_t trans, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
Definition bl1_axpymt.c:81
void bl1_dgemm_blas(trans1_t transa, trans1_t transb, int m, int n, int k, double *alpha, double *a, int lda, double *b, int ldb, double *beta, double *c, int ldc)
Definition bl1_gemm.c:1254
void bl1_dscalm(conj1_t conj, int m, int n, double *alpha, double *a, int a_rs, int a_cs)
Definition bl1_scalm.c:65
void bl1_dfree_contigm(double *a_save, int a_rs_save, int a_cs_save, double **a, int *a_rs, int *a_cs)
Definition bl1_free_contigm.c:29
void bl1_dcreate_contigm(int m, int n, double *a_save, int a_rs_save, int a_cs_save, double **a, int *a_rs, int *a_cs)
Definition bl1_create_contigm.c:47
void bl1_dfree_saved_contigm(int m, int n, double *a_save, int a_rs_save, int a_cs_save, double **a, int *a_rs, int *a_cs)
Definition bl1_free_saved_contigm.c:36
void bl1_dcreate_contigmt(trans1_t trans_dims, int m, int n, double *a_save, int a_rs_save, int a_cs_save, double **a, int *a_rs, int *a_cs)
Definition bl1_create_contigmt.c:51
double bl1_d0(void)
Definition bl1_constants.c:118
void bl1_dfree(double *p)
Definition bl1_free.c:35
double * bl1_dallocm(unsigned int m, unsigned int n)
Definition bl1_allocm.c:35
double bl1_d1(void)
Definition bl1_constants.c:54

References bl1_d0(), bl1_d1(), bl1_dallocm(), bl1_daxpymt(), bl1_dcreate_contigm(), bl1_dcreate_contigmt(), bl1_dfree(), bl1_dfree_contigm(), bl1_dfree_saved_contigm(), bl1_dgemm_blas(), bl1_dscalm(), bl1_is_col_storage(), bl1_zero_dim3(), BLIS1_NO_CONJUGATE, and BLIS1_TRANSPOSE.

Referenced by FLA_Bsvd_v_opd_var2(), FLA_Bsvd_v_opz_var2(), FLA_Gemm_external(), FLA_Tevd_v_opd_var2(), and FLA_Tevd_v_opz_var2().

◆ bl1_dgemm_blas()

void bl1_dgemm_blas ( trans1_t  transa,
trans1_t  transb,
int  m,
int  n,
int  k,
double alpha,
double a,
int  lda,
double b,
int  ldb,
double beta,
double c,
int  ldc 
)
1255{
1256#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
1260
1263
1267 m,
1268 n,
1269 k,
1270 *alpha,
1271 a, lda,
1272 b, ldb,
1273 *beta,
1274 c, ldc );
1275#else
1276 char blas_transa;
1277 char blas_transb;
1278
1281
1283 &blas_transb,
1284 &m,
1285 &n,
1286 &k,
1287 alpha,
1288 a, &lda,
1289 b, &ldb,
1290 beta,
1291 c, &ldc );
1292#endif
1293}
void F77_dgemm(char *transa, char *transb, int *m, int *n, int *k, double *alpha, double *a, int *lda, double *b, int *ldb, double *beta, double *c, int *ldc)
void cblas_dgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const double alpha, const double *A, const int lda, const double *B, const int ldb, const double beta, double *C, const int ldc)

References bl1_param_map_to_netlib_trans(), cblas_dgemm(), CblasColMajor, and F77_dgemm().

Referenced by bl1_dgemm().

◆ bl1_dhemm()

void bl1_dhemm ( side1_t  side,
uplo1_t  uplo,
int  m,
int  n,
double alpha,
double a,
int  a_rs,
int  a_cs,
double b,
int  b_rs,
int  b_cs,
double beta,
double c,
int  c_rs,
int  c_cs 
)
26{
28 uplo,
29 m,
30 n,
31 alpha,
32 a, a_rs, a_cs,
33 b, b_rs, b_cs,
34 beta,
35 c, c_rs, c_cs );
36}
void bl1_dsymm(side1_t side, uplo1_t uplo, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs, double *beta, double *c, int c_rs, int c_cs)
Definition bl1_symm.c:274

References bl1_dsymm().

◆ bl1_dher2k()

void bl1_dher2k ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
double alpha,
double a,
int  a_rs,
int  a_cs,
double b,
int  b_rs,
int  b_cs,
double beta,
double c,
int  c_rs,
int  c_cs 
)
27{
28 bl1_dsyr2k( uplo,
29 trans,
30 m,
31 k,
32 alpha,
33 a, a_rs, a_cs,
34 b, b_rs, b_cs,
35 beta,
36 c, c_rs, c_cs );
37}
void bl1_dsyr2k(uplo1_t uplo, trans1_t trans, int m, int k, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs, double *beta, double *c, int c_rs, int c_cs)
Definition bl1_syr2k.c:239

References bl1_dsyr2k().

◆ bl1_dherk()

void bl1_dherk ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
double alpha,
double a,
int  a_rs,
int  a_cs,
double beta,
double c,
int  c_rs,
int  c_cs 
)
25{
26 bl1_dsyrk( uplo,
27 trans,
28 m,
29 k,
30 alpha,
31 a, a_rs, a_cs,
32 beta,
33 c, c_rs, c_cs );
34}
void bl1_dsyrk(uplo1_t uplo, trans1_t trans, int m, int k, double *alpha, double *a, int a_rs, int a_cs, double *beta, double *c, int c_rs, int c_cs)
Definition bl1_syrk.c:109

References bl1_dsyrk().

◆ bl1_dsymm()

void bl1_dsymm ( side1_t  side,
uplo1_t  uplo,
int  m,
int  n,
double alpha,
double a,
int  a_rs,
int  a_cs,
double b,
int  b_rs,
int  b_cs,
double beta,
double c,
int  c_rs,
int  c_cs 
)
275{
276 int m_save = m;
277 int n_save = n;
278 double* a_save = a;
279 double* b_save = b;
280 double* c_save = c;
281 int a_rs_save = a_rs;
282 int a_cs_save = a_cs;
283 int b_rs_save = b_rs;
284 int b_cs_save = b_cs;
285 int c_rs_save = c_rs;
286 int c_cs_save = c_cs;
287 double zero = bl1_d0();
288 double one = bl1_d1();
289 double* b_copy;
290 double* c_trans;
291 int dim_a;
292 int lda, inca;
293 int ldb, incb;
294 int ldc, incc;
295 int ldb_copy, incb_copy;
300
301 // Return early if possible.
302 if ( bl1_zero_dim2( m, n ) ) return;
303
304 // If necessary, allocate, initialize, and use a temporary contiguous
305 // copy of each matrix rather than the original matrices.
308 dim_a,
309 dim_a,
311 &a, &a_rs, &a_cs );
312
314 n,
316 &b, &b_rs, &b_cs );
317
319 n,
321 &c, &c_rs, &c_cs );
322
323 // Initialize with values assuming column-major storage.
324 lda = a_cs;
325 inca = a_rs;
326 ldb = b_cs;
327 incb = b_rs;
328 ldc = c_cs;
329 incc = c_rs;
330
331 // Adjust the parameters based on the storage of each matrix.
332 if ( bl1_is_col_storage( c_rs, c_cs ) )
333 {
334 if ( bl1_is_col_storage( a_rs, a_cs ) )
335 {
336 if ( bl1_is_col_storage( b_rs, b_cs ) )
337 {
338 // requested operation: C_c += uplo( A_c ) * B_c
339 // effective operation: C_c += uplo( A_c ) * B_c
340 }
341 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
342 {
343 // requested operation: C_c += uplo( A_c ) * B_r
344 // effective operation: C_c += uplo( A_c ) * B_c
346 }
347 }
348 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
349 {
350 if ( bl1_is_col_storage( b_rs, b_cs ) )
351 {
352 // requested operation: C_c += uplo( A_r ) * B_c
353 // effective operation: C_c += ~uplo( conj( A_c ) ) * B_c
355
356 bl1_toggle_uplo( uplo );
357 }
358 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
359 {
360 // requested operation: C_c += uplo( A_r ) * B_r
361 // effective operation: C_c += ( B_c * ~uplo( conj( A_c ) ) )^T
364
366 bl1_toggle_uplo( uplo );
367
369 }
370 }
371 }
372 else // if ( bl1_is_row_storage( c_rs, c_cs ) )
373 {
374 if ( bl1_is_col_storage( a_rs, a_cs ) )
375 {
376 if ( bl1_is_col_storage( b_rs, b_cs ) )
377 {
378 // requested operation: C_r += uplo( A_c ) * B_c
379 // effective operation: C_c += ( uplo( A_c ) * B_c )^T
381
382 bl1_swap_ints( m, n );
383
385 }
386 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
387 {
388 // requested operation: C_r += uplo( A_c ) * B_r
389 // effective operation: C_c += B_c * ~uplo( conj( A_c ) )
392
393 bl1_swap_ints( m, n );
394
396 }
397 }
398 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
399 {
400 if ( bl1_is_col_storage( b_rs, b_cs ) )
401 {
402 // requested operation: C_r += uplo( A_r ) * B_c
403 // effective operation: C_c += B_c^T * ~uplo( A_c )
406
407 bl1_swap_ints( m, n );
408
410 bl1_toggle_uplo( uplo );
411
414 }
415 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
416 {
417 // requested operation: C_r += uplo( A_r ) * B_r
418 // effective operation: C_c += B_c * conj( ~uplo( A_c ) )
422
423 bl1_swap_ints( m, n );
424
425 bl1_toggle_uplo( uplo );
427 }
428 }
429 }
430
431 // We need a temporary matrix for the cases where B needs to be copied.
432 b_copy = b;
433 ldb_copy = ldb;
434 incb_copy = incb;
435
436 // There are two cases where we need to make a copy of B: one where the
437 // copy's dimensions are transposed from the original B, and one where
438 // the dimensions are not swapped.
439 if ( symm_needs_copyb )
440 {
442
443 // Set transb, which determines whether or not we need to copy from B
444 // as if it needs a transposition. If a transposition is needed, then
445 // m and n and have already been swapped. So in either case m
446 // represents the leading dimension of the copy.
449
450 b_copy = bl1_dallocm( m, n );
451 ldb_copy = m;
452 incb_copy = 1;
453
455 m,
456 n,
457 b, incb, ldb,
459 }
460
461 // There are two cases where we need to perform the symm and then axpy
462 // the result into C with a transposition. We handle those cases here.
463 if ( symm_needs_axpyt )
464 {
465 // We need a temporary matrix for holding C^T. Notice that m and n
466 // represent the dimensions of C, and thus C_trans is n-by-m
467 // (interpreting both as column-major matrices). So the leading
468 // dimension of the temporary matrix holding C^T is n.
469 c_trans = bl1_dallocm( n, m );
470 ldc_trans = n;
471 incc_trans = 1;
472
473 // Compute A * B (or B * A) and store the result in C_trans.
474 // Note that there is no overlap between the axpyt cases and
475 // the conja/copyb cases, hence the use of a, b, lda, and ldb.
477 uplo,
478 n,
479 m,
480 alpha,
481 a, lda,
482 b, ldb,
483 &zero,
485
486 // Scale C by beta.
488 m,
489 n,
490 beta,
491 c, incc, ldc );
492
493 // And finally, accumulate the matrix product in C_trans into C
494 // with a transpose.
496 m,
497 n,
498 &one,
500 c, incc, ldc );
501
502 // Free the temporary matrix for C.
504 }
505 else // no extra axpyt step needed
506 {
508 uplo,
509 m,
510 n,
511 alpha,
512 a, lda,
514 beta,
515 c, ldc );
516 }
517
518 if ( symm_needs_copyb )
519 bl1_dfree( b_copy );
520
521 // Free any temporary contiguous matrices, copying the result back to
522 // the original matrix.
524 &a, &a_rs, &a_cs );
525
527 &b, &b_rs, &b_cs );
528
530 n_save,
532 &c, &c_rs, &c_cs );
533}
void bl1_dcopymt(trans1_t trans, int m, int n, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
Definition bl1_copymt.c:148
void bl1_dsymm_blas(side1_t side, uplo1_t uplo, int m, int n, double *alpha, double *a, int lda, double *b, int ldb, double *beta, double *c, int ldc)
Definition bl1_symm.c:1098
void bl1_dcreate_contigmr(uplo1_t uplo, int m, int n, double *a_save, int a_rs_save, int a_cs_save, double **a, int *a_rs, int *a_cs)
Definition bl1_create_contigmr.c:45

References bl1_d0(), bl1_d1(), bl1_dallocm(), bl1_daxpymt(), bl1_dcopymt(), bl1_dcreate_contigm(), bl1_dcreate_contigmr(), bl1_dfree(), bl1_dfree_contigm(), bl1_dfree_saved_contigm(), bl1_dscalm(), bl1_dsymm_blas(), bl1_is_col_storage(), bl1_set_dim_with_side(), bl1_zero_dim2(), BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, and BLIS1_TRANSPOSE.

Referenced by bl1_dhemm(), FLA_Hemm_external(), and FLA_Symm_external().

◆ bl1_dsymm_blas()

void bl1_dsymm_blas ( side1_t  side,
uplo1_t  uplo,
int  m,
int  n,
double alpha,
double a,
int  lda,
double b,
int  ldb,
double beta,
double c,
int  ldc 
)
1099{
1100#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
1104
1107
1109 cblas_side,
1110 cblas_uplo,
1111 m,
1112 n,
1113 *alpha,
1114 a, lda,
1115 b, ldb,
1116 *beta,
1117 c, ldc );
1118#else
1119 char blas_side;
1120 char blas_uplo;
1121
1124
1126 &blas_uplo,
1127 &m,
1128 &n,
1129 alpha,
1130 a, &lda,
1131 b, &ldb,
1132 beta,
1133 c, &ldc );
1134#endif
1135}
void F77_dsymm(char *side, char *uplo, int *m, int *n, double *alpha, double *a, int *lda, double *b, int *ldb, double *beta, double *c, int *ldc)
void cblas_dsymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const double alpha, const double *A, const int lda, const double *B, const int ldb, const double beta, double *C, const int ldc)

References bl1_param_map_to_netlib_side(), bl1_param_map_to_netlib_uplo(), cblas_dsymm(), CblasColMajor, and F77_dsymm().

Referenced by bl1_dsymm().

◆ bl1_dsyr2k()

void bl1_dsyr2k ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
double alpha,
double a,
int  a_rs,
int  a_cs,
double b,
int  b_rs,
int  b_cs,
double beta,
double c,
int  c_rs,
int  c_cs 
)
240{
241 uplo1_t uplo_save = uplo;
242 int m_save = m;
243 double* a_save = a;
244 double* b_save = b;
245 double* c_save = c;
246 int a_rs_save = a_rs;
247 int a_cs_save = a_cs;
248 int b_rs_save = b_rs;
249 int b_cs_save = b_cs;
250 int c_rs_save = c_rs;
251 int c_cs_save = c_cs;
252 double* a_copy;
253 double* b_copy;
254 int lda, inca;
255 int ldb, incb;
256 int ldc, incc;
257 int lda_copy, inca_copy;
258 int ldb_copy, incb_copy;
261
262 // Return early if possible.
263 if ( bl1_zero_dim2( m, k ) ) return;
264
265 // If necessary, allocate, initialize, and use a temporary contiguous
266 // copy of each matrix rather than the original matrices.
268 m,
269 k,
271 &a, &a_rs, &a_cs );
272
274 m,
275 k,
277 &b, &b_rs, &b_cs );
278
280 m,
281 m,
283 &c, &c_rs, &c_cs );
284
285 // Initialize with values assuming column-major storage.
286 lda = a_cs;
287 inca = a_rs;
288 ldb = b_cs;
289 incb = b_rs;
290 ldc = c_cs;
291 incc = c_rs;
292
293 // Adjust the parameters based on the storage of each matrix.
294 if ( bl1_is_col_storage( c_rs, c_cs ) )
295 {
296 if ( bl1_is_col_storage( a_rs, a_cs ) )
297 {
298 if ( bl1_is_col_storage( b_rs, b_cs ) )
299 {
300 // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
301 // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
302 }
303 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
304 {
305 // requested operation: uplo( C_c ) += A_c * B_r' + B_r * A_c'
306 // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
308 }
309 }
310 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
311 {
312 if ( bl1_is_col_storage( b_rs, b_cs ) )
313 {
314 // requested operation: uplo( C_c ) += A_r * B_c' + B_c * A_r'
315 // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
317 }
318 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
319 {
320 // requested operation: uplo( C_c ) += A_r * B_r' + B_r * A_r'
321 // requested operation: uplo( C_c ) += conj( A_c' * B_c + B_c' * A_c )
324
326 }
327 }
328 }
329 else // if ( bl1_is_row_storage( c_rs, c_cs ) )
330 {
331 if ( bl1_is_col_storage( a_rs, a_cs ) )
332 {
333 if ( bl1_is_col_storage( b_rs, b_cs ) )
334 {
335 // requested operation: uplo( C_r ) += A_c * B_c' + B_c * A_c'
336 // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
338
339 bl1_toggle_uplo( uplo );
340 }
341 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
342 {
343 // requested operation: uplo( C_r ) += A_c * B_r' + B_r * A_c'
344 // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
346
348
349 bl1_toggle_uplo( uplo );
350 }
351 }
352 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
353 {
354 if ( bl1_is_col_storage( b_rs, b_cs ) )
355 {
356 // requested operation: uplo( C_r ) += A_r * B_c' + B_c * A_r'
357 // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
359
361
362 bl1_toggle_uplo( uplo );
363 }
364 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
365 {
366 // requested operation: uplo( C_r ) += A_r * B_r' + B_r * A_r'
367 // requested operation: ~uplo( C_c ) += A_c' * B_c + B_c' * A_c
371
372 bl1_toggle_uplo( uplo );
374 }
375 }
376 }
377
378 a_copy = a;
379 lda_copy = lda;
380 inca_copy = inca;
381
382 // There are two cases where we need to copy A column-major storage.
383 // We handle those two cases here.
384 if ( syr2k_needs_copya )
385 {
386 int m_a;
387 int n_a;
388
389 // Determine the dimensions of A according to the value of trans. We
390 // need this in order to set the leading dimension of the copy of A.
392
393 // We need a temporary matrix to hold a column-major copy of A.
394 a_copy = bl1_dallocm( m, k );
395 lda_copy = m_a;
396 inca_copy = 1;
397
398 // Copy the contents of A into A_copy.
400 m_a,
401 n_a,
402 a, inca, lda,
404 }
405
406 b_copy = b;
407 ldb_copy = ldb;
408 incb_copy = incb;
409
410 // There are two cases where we need to copy B column-major storage.
411 // We handle those two cases here.
412 if ( syr2k_needs_copyb )
413 {
414 int m_b;
415 int n_b;
416
417 // Determine the dimensions of B according to the value of trans. We
418 // need this in order to set the leading dimension of the copy of B.
420
421 // We need a temporary matrix to hold a column-major copy of B.
422 b_copy = bl1_dallocm( m, k );
423 ldb_copy = m_b;
424 incb_copy = 1;
425
426 // Copy the contents of B into B_copy.
428 m_b,
429 n_b,
430 b, incb, ldb,
432 }
433
434 bl1_dsyr2k_blas( uplo,
435 trans,
436 m,
437 k,
438 alpha,
441 beta,
442 c, ldc );
443
444 if ( syr2k_needs_copya )
445 bl1_dfree( a_copy );
446
447 if ( syr2k_needs_copyb )
448 bl1_dfree( b_copy );
449
450 // Free any temporary contiguous matrices, copying the result back to
451 // the original matrix.
453 &a, &a_rs, &a_cs );
454
456 &b, &b_rs, &b_cs );
457
459 m_save,
460 m_save,
462 &c, &c_rs, &c_cs );
463}
void bl1_dsyr2k_blas(uplo1_t uplo, trans1_t trans, int m, int k, double *alpha, double *a, int lda, double *b, int ldb, double *beta, double *c, int ldc)
Definition bl1_syr2k.c:966
void bl1_dfree_saved_contigmr(uplo1_t uplo, int m, int n, double *a_save, int a_rs_save, int a_cs_save, double **a, int *a_rs, int *a_cs)
Definition bl1_free_saved_contigmr.c:36

References bl1_dallocm(), bl1_dcopymt(), bl1_dcreate_contigmr(), bl1_dcreate_contigmt(), bl1_dfree(), bl1_dfree_contigm(), bl1_dfree_saved_contigmr(), bl1_dsyr2k_blas(), bl1_is_col_storage(), bl1_set_dims_with_trans(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by bl1_dher2k(), FLA_Her2k_external(), and FLA_Syr2k_external().

◆ bl1_dsyr2k_blas()

void bl1_dsyr2k_blas ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
double alpha,
double a,
int  lda,
double b,
int  ldb,
double beta,
double c,
int  ldc 
)
967{
968#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
972
973 // BLAS doesn't recognize the conjugate-transposition constant for syr2k,
974 // so we have to map it down to regular transposition.
976
979
983 m,
984 k,
985 *alpha,
986 a, lda,
987 b, ldb,
988 *beta,
989 c, ldc );
990#else
991 char blas_uplo;
992 char blas_trans;
993
994 // BLAS doesn't recognize the conjugate-transposition constant for syr2k,
995 // so we have to map it down to regular transposition.
997
1000
1002 &blas_trans,
1003 &m,
1004 &k,
1005 alpha,
1006 a, &lda,
1007 b, &ldb,
1008 beta,
1009 c, &ldc );
1010#endif
1011}
void F77_dsyr2k(char *uplo, char *transa, int *n, int *k, double *alpha, double *a, int *lda, double *b, int *ldb, double *beta, double *c, int *ldc)
void cblas_dsyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const double alpha, const double *A, const int lda, const double *B, const int ldb, const double beta, double *C, const int ldc)

References bl1_is_conjtrans(), bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), BLIS1_TRANSPOSE, cblas_dsyr2k(), CblasColMajor, and F77_dsyr2k().

Referenced by bl1_dsyr2k().

◆ bl1_dsyrk()

void bl1_dsyrk ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
double alpha,
double a,
int  a_rs,
int  a_cs,
double beta,
double c,
int  c_rs,
int  c_cs 
)
110{
111 uplo1_t uplo_save = uplo;
112 int m_save = m;
113 double* a_save = a;
114 double* c_save = c;
115 int a_rs_save = a_rs;
116 int a_cs_save = a_cs;
117 int c_rs_save = c_rs;
118 int c_cs_save = c_cs;
119 int lda, inca;
120 int ldc, incc;
121
122 // Return early if possible.
123 if ( bl1_zero_dim2( m, k ) ) return;
124
125 // If necessary, allocate, initialize, and use a temporary contiguous
126 // copy of each matrix rather than the original matrices.
128 m,
129 k,
131 &a, &a_rs, &a_cs );
132
134 m,
135 m,
137 &c, &c_rs, &c_cs );
138
139 // Initialize with values assuming column-major storage.
140 lda = a_cs;
141 inca = a_rs;
142 ldc = c_cs;
143 incc = c_rs;
144
145 // Adjust the parameters based on the storage of each matrix.
146 if ( bl1_is_col_storage( c_rs, c_cs ) )
147 {
148 if ( bl1_is_col_storage( a_rs, a_cs ) )
149 {
150 // requested operation: uplo( C_c ) += A_c * A_c^T
151 // effective operation: uplo( C_c ) += A_c * A_c^T
152 }
153 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
154 {
155 // requested operation: uplo( C_c ) += A_r * A_r^T
156 // effective operation: uplo( C_c ) += A_c^T * A_c
158
160 }
161 }
162 else // if ( bl1_is_row_storage( c_rs, c_cs ) )
163 {
164 if ( bl1_is_col_storage( a_rs, a_cs ) )
165 {
166 // requested operation: uplo( C_r ) += A_c * A_c^T
167 // effective operation: ~uplo( C_c ) += A_c * A_c^T
169
170 bl1_toggle_uplo( uplo );
171 }
172 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
173 {
174 // requested operation: uplo( C_r ) += A_r * A_r^T
175 // effective operation: ~uplo( C_c ) += A_c^T * A_c
178
179 bl1_toggle_uplo( uplo );
181 }
182 }
183
184 bl1_dsyrk_blas( uplo,
185 trans,
186 m,
187 k,
188 alpha,
189 a, lda,
190 beta,
191 c, ldc );
192
193 // Free any temporary contiguous matrices, copying the result back to
194 // the original matrix.
196 &a, &a_rs, &a_cs );
197
199 m_save,
200 m_save,
202 &c, &c_rs, &c_cs );
203}
void bl1_dsyrk_blas(uplo1_t uplo, trans1_t trans, int m, int k, double *alpha, double *a, int lda, double *beta, double *c, int ldc)
Definition bl1_syrk.c:436

References bl1_dcreate_contigmr(), bl1_dcreate_contigmt(), bl1_dfree_contigm(), bl1_dfree_saved_contigmr(), bl1_dsyrk_blas(), bl1_is_col_storage(), and bl1_zero_dim2().

Referenced by bl1_dherk(), FLA_Herk_external(), FLA_Syrk_external(), and FLA_UDdate_UT_opd_var1().

◆ bl1_dsyrk_blas()

void bl1_dsyrk_blas ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
double alpha,
double a,
int  lda,
double beta,
double c,
int  ldc 
)
437{
438#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
442
445
449 m,
450 k,
451 *alpha,
452 a, lda,
453 *beta,
454 c, ldc );
455#else
456 char blas_uplo;
457 char blas_trans;
458
461
463 &blas_trans,
464 &m,
465 &k,
466 alpha,
467 a, &lda,
468 beta,
469 c, &ldc );
470#endif
471}
void F77_dsyrk(char *uplo, char *transa, int *n, int *k, double *alpha, double *a, int *lda, double *beta, double *c, int *ldc)
void cblas_dsyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const double alpha, const double *A, const int lda, const double beta, double *C, const int ldc)

References bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), cblas_dsyrk(), CblasColMajor, and F77_dsyrk().

Referenced by bl1_dsyrk().

◆ bl1_dtrmm()

void bl1_dtrmm ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
double alpha,
double a,
int  a_rs,
int  a_cs,
double b,
int  b_rs,
int  b_cs 
)
117{
118 int m_save = m;
119 int n_save = n;
120 double* a_save = a;
121 double* b_save = b;
122 int a_rs_save = a_rs;
123 int a_cs_save = a_cs;
124 int b_rs_save = b_rs;
125 int b_cs_save = b_cs;
126 int dim_a;
127 int lda, inca;
128 int ldb, incb;
129
130 // Return early if possible.
131 if ( bl1_zero_dim2( m, n ) ) return;
132
133 // If necessary, allocate, initialize, and use a temporary contiguous
134 // copy of each matrix rather than the original matrices.
137 dim_a,
138 dim_a,
140 &a, &a_rs, &a_cs );
141
143 n,
145 &b, &b_rs, &b_cs );
146
147 // Initialize with values assuming column-major storage.
148 lda = a_cs;
149 inca = a_rs;
150 ldb = b_cs;
151 incb = b_rs;
152
153 // Adjust the parameters based on the storage of each matrix.
154 if ( bl1_is_col_storage( b_rs, b_cs ) )
155 {
156 if ( bl1_is_col_storage( a_rs, a_cs ) )
157 {
158 // requested operation: B_c := tr( uplo( A_c ) ) * B_c
159 // effective operation: B_c := tr( uplo( A_c ) ) * B_c
160 }
161 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
162 {
163 // requested operation: B_c := tr( uplo( A_r ) ) * B_c
164 // effective operation: B_c := tr( ~uplo( A_c ) )^T * B_c
166
167 bl1_toggle_uplo( uplo );
169 }
170 }
171 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
172 {
173 if ( bl1_is_col_storage( a_rs, a_cs ) )
174 {
175 // requested operation: B_r := tr( uplo( A_c ) ) * B_r
176 // effective operation: B_c := B_c * tr( uplo( A_c ) )^T
178
179 bl1_swap_ints( m, n );
180
183 }
184 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
185 {
186 // requested operation: B_r := tr( uplo( A_r ) ) * B_r
187 // effective operation: B_c := B_c * tr( ~uplo( A_c ) )
190
191 bl1_swap_ints( m, n );
192
193 bl1_toggle_uplo( uplo );
195 }
196 }
197
199 uplo,
200 trans,
201 diag,
202 m,
203 n,
204 alpha,
205 a, lda,
206 b, ldb );
207
208 // Free any temporary contiguous matrices, copying the result back to
209 // the original matrix.
211 &a, &a_rs, &a_cs );
212
214 n_save,
216 &b, &b_rs, &b_cs );
217}
void bl1_dtrmm_blas(side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, double *alpha, double *a, int lda, double *b, int ldb)
Definition bl1_trmm.c:567

References bl1_dcreate_contigm(), bl1_dcreate_contigmr(), bl1_dfree_contigm(), bl1_dfree_saved_contigm(), bl1_dtrmm_blas(), bl1_is_col_storage(), bl1_set_dim_with_side(), and bl1_zero_dim2().

Referenced by bl1_dtrmmsx(), and FLA_Trmm_external().

◆ bl1_dtrmm_blas()

void bl1_dtrmm_blas ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
double alpha,
double a,
int  lda,
double b,
int  ldb 
)
568{
569#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
575
580
586 m,
587 n,
588 *alpha,
589 a, lda,
590 b, ldb );
591#else
592 char blas_side;
593 char blas_uplo;
594 char blas_trans;
595 char blas_diag;
596
601
603 &blas_uplo,
604 &blas_trans,
605 &blas_diag,
606 &m,
607 &n,
608 alpha,
609 a, &lda,
610 b, &ldb );
611#endif
612}
void F77_dtrmm(char *side, char *uplo, char *transa, char *diag, int *m, int *n, double *alpha, double *a, int *lda, double *b, int *ldb)
void cblas_dtrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const double alpha, const double *A, const int lda, double *B, const int ldb)

References bl1_param_map_to_netlib_diag(), bl1_param_map_to_netlib_side(), bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), cblas_dtrmm(), CblasColMajor, and F77_dtrmm().

Referenced by bl1_dtrmm().

◆ bl1_dtrmmsx()

void bl1_dtrmmsx ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
double alpha,
double a,
int  a_rs,
int  a_cs,
double b,
int  b_rs,
int  b_cs,
double beta,
double c,
int  c_rs,
int  c_cs 
)
120{
121 int m_save = m;
122 int n_save = n;
123 double* a_save = a;
124 double* b_save = b;
125 double* c_save = c;
126 int a_rs_save = a_rs;
127 int a_cs_save = a_cs;
128 int b_rs_save = b_rs;
129 int b_cs_save = b_cs;
130 int c_rs_save = c_rs;
131 int c_cs_save = c_cs;
132 double one = bl1_d1();
133 double* b_copy;
134 int dim_a;
135 int b_copy_rs, b_copy_cs;
136
137 // Return early if possible.
138 if ( bl1_zero_dim2( m, n ) ) return;
139
140 // If necessary, allocate, initialize, and use a temporary contiguous
141 // copy of each matrix rather than the original matrices.
144 dim_a,
145 dim_a,
147 &a, &a_rs, &a_cs );
148
150 n,
152 &b, &b_rs, &b_cs );
153
155 n,
157 &c, &c_rs, &c_cs );
158
159 // Create a copy of B to use in the computation so the original matrix is
160 // left untouched.
161 b_copy = bl1_dallocm( m, n );
162
163 // Match the strides of B_copy to that of B.
164 if ( bl1_is_col_storage( b_rs, b_cs ) )
165 {
166 b_copy_rs = 1;
167 b_copy_cs = m;
168 }
169 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
170 {
171 b_copy_rs = n;
172 b_copy_cs = 1;
173 }
174
175 // Copy the contents of B to B_copy.
177 m,
178 n,
179 b, b_rs, b_cs,
181
182 // Perform the operation on B_copy.
184 uplo,
185 trans,
186 diag,
187 m,
188 n,
189 alpha,
190 a, a_rs, a_cs,
192
193 // Scale C by beta.
195 m,
196 n,
197 beta,
198 c, c_rs, c_cs );
199
200 // Add B_copy into C.
202 m,
203 n,
204 &one,
206 c, c_rs, c_cs );
207
208 // Free the copy of B.
209 bl1_dfree( b_copy );
210
211 // Free any temporary contiguous matrices, copying the result back to
212 // the original matrix.
214 &a, &a_rs, &a_cs );
215
217 &b, &b_rs, &b_cs );
218
220 n_save,
222 &c, &c_rs, &c_cs );
223}
void bl1_dtrmm(side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
Definition bl1_trmm.c:116

References bl1_d1(), bl1_dallocm(), bl1_daxpymt(), bl1_dcopymt(), bl1_dcreate_contigm(), bl1_dcreate_contigmr(), bl1_dfree(), bl1_dfree_contigm(), bl1_dfree_saved_contigm(), bl1_dscalm(), bl1_dtrmm(), bl1_is_col_storage(), bl1_set_dim_with_side(), bl1_zero_dim2(), BLIS1_NO_CONJUGATE, and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Trmmsx_external().

◆ bl1_dtrsm()

void bl1_dtrsm ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
double alpha,
double a,
int  a_rs,
int  a_cs,
double b,
int  b_rs,
int  b_cs 
)
117{
118 int m_save = m;
119 int n_save = n;
120 double* a_save = a;
121 double* b_save = b;
122 int a_rs_save = a_rs;
123 int a_cs_save = a_cs;
124 int b_rs_save = b_rs;
125 int b_cs_save = b_cs;
126 int dim_a;
127 int lda, inca;
128 int ldb, incb;
129
130 // Return early if possible.
131 if ( bl1_zero_dim2( m, n ) ) return;
132
133 // If necessary, allocate, initialize, and use a temporary contiguous
134 // copy of each matrix rather than the original matrices.
137 dim_a,
138 dim_a,
140 &a, &a_rs, &a_cs );
141
143 n,
145 &b, &b_rs, &b_cs );
146
147 // Initialize with values assuming column-major storage.
148 lda = a_cs;
149 inca = a_rs;
150 ldb = b_cs;
151 incb = b_rs;
152
153 // Adjust the parameters based on the storage of each matrix.
154 if ( bl1_is_col_storage( b_rs, b_cs ) )
155 {
156 if ( bl1_is_col_storage( a_rs, a_cs ) )
157 {
158 // requested operation: B_c := tr( uplo( A_c ) ) * B_c
159 // effective operation: B_c := tr( uplo( A_c ) ) * B_c
160 }
161 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
162 {
163 // requested operation: B_c := tr( uplo( A_r ) ) * B_c
164 // effective operation: B_c := tr( ~uplo( A_c ) )^T * B_c
166
167 bl1_toggle_uplo( uplo );
169 }
170 }
171 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
172 {
173 if ( bl1_is_col_storage( a_rs, a_cs ) )
174 {
175 // requested operation: B_r := tr( uplo( A_c ) ) * B_r
176 // effective operation: B_c := B_c * tr( uplo( A_c ) )^T
178
179 bl1_swap_ints( m, n );
180
183 }
184 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
185 {
186 // requested operation: B_r := tr( uplo( A_r ) ) * B_r
187 // effective operation: B_c := B_c * tr( ~uplo( A_c ) )
190
191 bl1_swap_ints( m, n );
192
193 bl1_toggle_uplo( uplo );
195 }
196 }
197
199 uplo,
200 trans,
201 diag,
202 m,
203 n,
204 alpha,
205 a, lda,
206 b, ldb );
207
208 // Free any temporary contiguous matrices, copying the result back to
209 // the original matrix.
211 &a, &a_rs, &a_cs );
212
214 n_save,
216 &b, &b_rs, &b_cs );
217}
void bl1_dtrsm_blas(side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, double *alpha, double *a, int lda, double *b, int ldb)
Definition bl1_trsm.c:567

References bl1_dcreate_contigm(), bl1_dcreate_contigmr(), bl1_dfree_contigm(), bl1_dfree_saved_contigm(), bl1_dtrsm_blas(), bl1_is_col_storage(), bl1_set_dim_with_side(), and bl1_zero_dim2().

Referenced by bl1_dtrsmsx(), FLA_LU_nopiv_opd_var1(), FLA_LU_nopiv_opd_var2(), FLA_LU_nopiv_opd_var3(), FLA_LU_piv_opd_var3(), and FLA_Trsm_external().

◆ bl1_dtrsm_blas()

void bl1_dtrsm_blas ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
double alpha,
double a,
int  lda,
double b,
int  ldb 
)
568{
569#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
575
580
586 m,
587 n,
588 *alpha,
589 a, lda,
590 b, ldb );
591#else
592 char blas_side;
593 char blas_uplo;
594 char blas_trans;
595 char blas_diag;
596
601
603 &blas_uplo,
604 &blas_trans,
605 &blas_diag,
606 &m,
607 &n,
608 alpha,
609 a, &lda,
610 b, &ldb );
611#endif
612}
void F77_dtrsm(char *side, char *uplo, char *transa, char *diag, int *m, int *n, double *alpha, double *a, int *lda, double *b, int *ldb)
void cblas_dtrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const double alpha, const double *A, const int lda, double *B, const int ldb)

References bl1_param_map_to_netlib_diag(), bl1_param_map_to_netlib_side(), bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), cblas_dtrsm(), CblasColMajor, and F77_dtrsm().

Referenced by bl1_dtrsm().

◆ bl1_dtrsmsx()

void bl1_dtrsmsx ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
double alpha,
double a,
int  a_rs,
int  a_cs,
double b,
int  b_rs,
int  b_cs,
double beta,
double c,
int  c_rs,
int  c_cs 
)
120{
121 int m_save = m;
122 int n_save = n;
123 double* a_save = a;
124 double* b_save = b;
125 double* c_save = c;
126 int a_rs_save = a_rs;
127 int a_cs_save = a_cs;
128 int b_rs_save = b_rs;
129 int b_cs_save = b_cs;
130 int c_rs_save = c_rs;
131 int c_cs_save = c_cs;
132 double one = bl1_d1();
133 double* b_copy;
134 int dim_a;
135 int b_copy_rs, b_copy_cs;
136
137 // Return early if possible.
138 if ( bl1_zero_dim2( m, n ) ) return;
139
140 // If necessary, allocate, initialize, and use a temporary contiguous
141 // copy of each matrix rather than the original matrices.
144 dim_a,
145 dim_a,
147 &a, &a_rs, &a_cs );
148
150 n,
152 &b, &b_rs, &b_cs );
153
155 n,
157 &c, &c_rs, &c_cs );
158
159 // Create a copy of B to use in the computation so the original matrix is
160 // left untouched.
161 b_copy = bl1_dallocm( m, n );
162
163 // Match the strides of B_copy to that of B.
164 if ( bl1_is_col_storage( b_rs, b_cs ) )
165 {
166 b_copy_rs = 1;
167 b_copy_cs = m;
168 }
169 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
170 {
171 b_copy_rs = n;
172 b_copy_cs = 1;
173 }
174
175 // Copy the contents of B to B_copy.
177 m,
178 n,
179 b, b_rs, b_cs,
181
182 // Perform the operation on B_copy.
184 uplo,
185 trans,
186 diag,
187 m,
188 n,
189 alpha,
190 a, a_rs, a_cs,
192
193 // Scale C by beta.
195 m,
196 n,
197 beta,
198 c, c_rs, c_cs );
199
200 // Add B_copy into C.
202 m,
203 n,
204 &one,
206 c, c_rs, c_cs );
207
208 // Free the copy of B.
209 bl1_dfree( b_copy );
210
211 // Free any temporary contiguous matrices, copying the result back to
212 // the original matrix.
214 &a, &a_rs, &a_cs );
215
217 &b, &b_rs, &b_cs );
218
220 n_save,
222 &c, &c_rs, &c_cs );
223}
void bl1_dtrsm(side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
Definition bl1_trsm.c:116

References bl1_d1(), bl1_dallocm(), bl1_daxpymt(), bl1_dcopymt(), bl1_dcreate_contigm(), bl1_dcreate_contigmr(), bl1_dfree(), bl1_dfree_contigm(), bl1_dfree_saved_contigm(), bl1_dscalm(), bl1_dtrsm(), bl1_is_col_storage(), bl1_set_dim_with_side(), bl1_zero_dim2(), BLIS1_NO_CONJUGATE, and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Trsmsx_external().

◆ bl1_sgemm()

void bl1_sgemm ( trans1_t  transa,
trans1_t  transb,
int  m,
int  k,
int  n,
float alpha,
float a,
int  a_rs,
int  a_cs,
float b,
int  b_rs,
int  b_cs,
float beta,
float c,
int  c_rs,
int  c_cs 
)
14{
15 int m_save = m;
16 int n_save = n;
17 float* a_save = a;
18 float* b_save = b;
19 float* c_save = c;
20 int a_rs_save = a_rs;
21 int a_cs_save = a_cs;
22 int b_rs_save = b_rs;
23 int b_cs_save = b_cs;
24 int c_rs_save = c_rs;
25 int c_cs_save = c_cs;
26 float zero = bl1_s0();
27 float one = bl1_s1();
28 float* a_unswap;
29 float* b_unswap;
30 float* c_trans;
31 int lda, inca;
32 int ldb, incb;
33 int ldc, incc;
35 int m_gemm, n_gemm;
37
38 // Return early if possible.
39 if ( bl1_zero_dim3( m, k, n ) )
40 {
42 m,
43 n,
44 beta,
45 c, c_rs, c_cs );
46 return;
47 }
48
49 // If necessary, allocate, initialize, and use a temporary contiguous
50 // copy of each matrix rather than the original matrices.
52 m,
53 k,
55 &a, &a_rs, &a_cs );
56
58 k,
59 n,
61 &b, &b_rs, &b_cs );
62
64 n,
66 &c, &c_rs, &c_cs );
67
68 // These are used to track the original values of a and b prior to any
69 // operand swapping that might take place. This is necessary for proper
70 // freeing of memory when one is a temporary contiguous matrix.
71 a_unswap = a;
72 b_unswap = b;
73
74 // These are used to track the dimensions of the product of the
75 // A and B operands to the BLAS invocation of gemm. These differ
76 // from m and n when the operands need to be swapped.
77 m_gemm = m;
78 n_gemm = n;
79
80 // Initialize with values assuming column-major storage.
81 lda = a_cs;
82 inca = a_rs;
83 ldb = b_cs;
84 incb = b_rs;
85 ldc = c_cs;
86 incc = c_rs;
87
88 // Adjust the parameters based on the storage of each matrix.
90 {
92 {
94 {
95 // requested operation: C_c += tr( A_c ) * tr( B_c )
96 // effective operation: C_c += tr( A_c ) * tr( B_c )
97 }
98 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
99 {
100
101 // requested operation: C_c += tr( A_c ) * tr( B_r )
102 // effective operation: C_c += tr( A_c ) * tr( B_c )^T
104
106 }
107 }
108 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
109 {
110 if ( bl1_is_col_storage( b_rs, b_cs ) )
111 {
112 // requested operation: C_c += tr( A_r ) * tr( B_c )
113 // effective operation: C_c += tr( A_r )^T * tr( B_c )
115
117 }
118 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
119 {
120 // requested operation: C_c += tr( A_r ) * tr( B_r )
121 // effective operation: C_c += ( tr( B_c ) * tr( A_c ) )^T
124
129
132 }
133 }
134 }
135 else // if ( bl1_is_row_storage( c_rs, c_cs ) )
136 {
137 if ( bl1_is_col_storage( a_rs, a_cs ) )
138 {
139 if ( bl1_is_col_storage( b_rs, b_cs ) )
140 {
141 // requested operation: C_r += tr( A_c ) * tr( B_c )
142 // effective operation: C_c += ( tr( A_c ) * tr( B_c ) )^T
144
145 bl1_swap_ints( m, n );
146
148 }
149 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
150 {
151 // requested operation: C_r += tr( A_c ) * tr( B_r )
152 // effective operation: C_c += tr( B_c ) * tr( A_c )^T
155
157
158 bl1_swap_ints( m, n );
164 }
165 }
166 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
167 {
168 if ( bl1_is_col_storage( b_rs, b_cs ) )
169 {
170 // requested operation: C_r += tr( A_r ) * tr( B_c )
171 // effective operation: C_c += tr( B_c )^T * tr( A_c )
174
176
177 bl1_swap_ints( m, n );
183 }
184 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
185 {
186 // requested operation: C_r += tr( A_r ) * tr( B_r )
187 // effective operation: C_c += tr( B_c ) * tr( A_c )
191
192 bl1_swap_ints( m, n );
198 }
199 }
200 }
201
202 // There are two cases where we need to perform the gemm and then axpy
203 // the result into C with a transposition. We handle those cases here.
204 if ( gemm_needs_axpyt )
205 {
206 // We need a temporary matrix for holding C^T. Notice that m and n
207 // represent the dimensions of C, while m_gemm and n_gemm are the
208 // dimensions of the actual product op(A)*op(B), which may be n-by-m
209 // since the operands may have been swapped.
212 incc_trans = 1;
213
214 // Compute tr( A ) * tr( B ), where A and B may have been swapped
215 // to reference the other, and store the result in C_trans.
217 transb,
218 m_gemm,
219 n_gemm,
220 k,
221 alpha,
222 a, lda,
223 b, ldb,
224 &zero,
226
227 // Scale C by beta.
229 m,
230 n,
231 beta,
232 c, incc, ldc );
233
234 // And finally, accumulate the matrix product in C_trans into C
235 // with a transpose.
237 m,
238 n,
239 &one,
241 c, incc, ldc );
242
243 // Free the temporary matrix for C.
245 }
246 else // no extra axpyt step needed
247 {
249 transb,
250 m_gemm,
251 n_gemm,
252 k,
253 alpha,
254 a, lda,
255 b, ldb,
256 beta,
257 c, ldc );
258 }
259
260 // Free any temporary contiguous matrices, copying the result back to
261 // the original matrix.
263 &a_unswap, &a_rs, &a_cs );
264
266 &b_unswap, &b_rs, &b_cs );
267
269 n_save,
271 &c, &c_rs, &c_cs );
272}
void bl1_saxpymt(trans1_t trans, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
Definition bl1_axpymt.c:13
void bl1_sgemm_blas(trans1_t transa, trans1_t transb, int m, int n, int k, float *alpha, float *a, int lda, float *b, int ldb, float *beta, float *c, int ldc)
Definition bl1_gemm.c:1213
void bl1_sscalm(conj1_t conj, int m, int n, float *alpha, float *a, int a_rs, int a_cs)
Definition bl1_scalm.c:13
void bl1_sfree_contigm(float *a_save, int a_rs_save, int a_cs_save, float **a, int *a_rs, int *a_cs)
Definition bl1_free_contigm.c:13
void bl1_sfree(float *p)
Definition bl1_free.c:30
float * bl1_sallocm(unsigned int m, unsigned int n)
Definition bl1_allocm.c:30
void bl1_sfree_saved_contigm(int m, int n, float *a_save, int a_rs_save, int a_cs_save, float **a, int *a_rs, int *a_cs)
Definition bl1_free_saved_contigm.c:13
float bl1_s1(void)
Definition bl1_constants.c:47
void bl1_screate_contigm(int m, int n, float *a_save, int a_rs_save, int a_cs_save, float **a, int *a_rs, int *a_cs)
Definition bl1_create_contigm.c:13
void bl1_screate_contigmt(trans1_t trans_dims, int m, int n, float *a_save, int a_rs_save, int a_cs_save, float **a, int *a_rs, int *a_cs)
Definition bl1_create_contigmt.c:13

References bl1_is_col_storage(), bl1_s0(), bl1_s1(), bl1_sallocm(), bl1_saxpymt(), bl1_screate_contigm(), bl1_screate_contigmt(), bl1_sfree(), bl1_sfree_contigm(), bl1_sfree_saved_contigm(), bl1_sgemm_blas(), bl1_sscalm(), bl1_zero_dim3(), BLIS1_NO_CONJUGATE, and BLIS1_TRANSPOSE.

Referenced by FLA_Gemm_external().

◆ bl1_sgemm_blas()

void bl1_sgemm_blas ( trans1_t  transa,
trans1_t  transb,
int  m,
int  n,
int  k,
float alpha,
float a,
int  lda,
float b,
int  ldb,
float beta,
float c,
int  ldc 
)
1214{
1215#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
1219
1222
1226 m,
1227 n,
1228 k,
1229 *alpha,
1230 a, lda,
1231 b, ldb,
1232 *beta,
1233 c, ldc );
1234#else
1235 char blas_transa;
1236 char blas_transb;
1237
1240
1242 &blas_transb,
1243 &m,
1244 &n,
1245 &k,
1246 alpha,
1247 a, &lda,
1248 b, &ldb,
1249 beta,
1250 c, &ldc );
1251#endif
1252}
void F77_sgemm(char *transa, char *transb, int *m, int *n, int *k, float *alpha, float *a, int *lda, float *b, int *ldb, float *beta, float *c, int *ldc)
void cblas_sgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const float alpha, const float *A, const int lda, const float *B, const int ldb, const float beta, float *C, const int ldc)

References bl1_param_map_to_netlib_trans(), cblas_sgemm(), CblasColMajor, and F77_sgemm().

Referenced by bl1_sgemm().

◆ bl1_shemm()

void bl1_shemm ( side1_t  side,
uplo1_t  uplo,
int  m,
int  n,
float alpha,
float a,
int  a_rs,
int  a_cs,
float b,
int  b_rs,
int  b_cs,
float beta,
float c,
int  c_rs,
int  c_cs 
)
14{
16 uplo,
17 m,
18 n,
19 alpha,
20 a, a_rs, a_cs,
21 b, b_rs, b_cs,
22 beta,
23 c, c_rs, c_cs );
24}
void bl1_ssymm(side1_t side, uplo1_t uplo, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs, float *beta, float *c, int c_rs, int c_cs)
Definition bl1_symm.c:13

References bl1_ssymm().

◆ bl1_sher2k()

void bl1_sher2k ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
float alpha,
float a,
int  a_rs,
int  a_cs,
float b,
int  b_rs,
int  b_cs,
float beta,
float c,
int  c_rs,
int  c_cs 
)
14{
15 bl1_ssyr2k( uplo,
16 trans,
17 m,
18 k,
19 alpha,
20 a, a_rs, a_cs,
21 b, b_rs, b_cs,
22 beta,
23 c, c_rs, c_cs );
24}
void bl1_ssyr2k(uplo1_t uplo, trans1_t trans, int m, int k, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs, float *beta, float *c, int c_rs, int c_cs)
Definition bl1_syr2k.c:13

References bl1_ssyr2k().

◆ bl1_sherk()

void bl1_sherk ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
float alpha,
float a,
int  a_rs,
int  a_cs,
float beta,
float c,
int  c_rs,
int  c_cs 
)
14{
15 bl1_ssyrk( uplo,
16 trans,
17 m,
18 k,
19 alpha,
20 a, a_rs, a_cs,
21 beta,
22 c, c_rs, c_cs );
23}
void bl1_ssyrk(uplo1_t uplo, trans1_t trans, int m, int k, float *alpha, float *a, int a_rs, int a_cs, float *beta, float *c, int c_rs, int c_cs)
Definition bl1_syrk.c:13

References bl1_ssyrk().

◆ bl1_ssymm()

void bl1_ssymm ( side1_t  side,
uplo1_t  uplo,
int  m,
int  n,
float alpha,
float a,
int  a_rs,
int  a_cs,
float b,
int  b_rs,
int  b_cs,
float beta,
float c,
int  c_rs,
int  c_cs 
)
14{
15 int m_save = m;
16 int n_save = n;
17 float* a_save = a;
18 float* b_save = b;
19 float* c_save = c;
20 int a_rs_save = a_rs;
21 int a_cs_save = a_cs;
22 int b_rs_save = b_rs;
23 int b_cs_save = b_cs;
24 int c_rs_save = c_rs;
25 int c_cs_save = c_cs;
26 float zero = bl1_s0();
27 float one = bl1_s1();
28 float* b_copy;
29 float* c_trans;
30 int dim_a;
31 int lda, inca;
32 int ldb, incb;
33 int ldc, incc;
39
40 // Return early if possible.
41 if ( bl1_zero_dim2( m, n ) ) return;
42
43 // If necessary, allocate, initialize, and use a temporary contiguous
44 // copy of each matrix rather than the original matrices.
47 dim_a,
48 dim_a,
50 &a, &a_rs, &a_cs );
51
53 n,
55 &b, &b_rs, &b_cs );
56
58 n,
60 &c, &c_rs, &c_cs );
61
62 // Initialize with values assuming column-major storage.
63 lda = a_cs;
64 inca = a_rs;
65 ldb = b_cs;
66 incb = b_rs;
67 ldc = c_cs;
68 incc = c_rs;
69
70 // Adjust the parameters based on the storage of each matrix.
72 {
74 {
76 {
77 // requested operation: C_c += uplo( A_c ) * B_c
78 // effective operation: C_c += uplo( A_c ) * B_c
79 }
80 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
81 {
82 // requested operation: C_c += uplo( A_c ) * B_r
83 // effective operation: C_c += uplo( A_c ) * B_c
85 }
86 }
87 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
88 {
90 {
91 // requested operation: C_c += uplo( A_r ) * B_c
92 // effective operation: C_c += ~uplo( conj( A_c ) ) * B_c
94
95 bl1_toggle_uplo( uplo );
96 }
97 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
98 {
99 // requested operation: C_c += uplo( A_r ) * B_r
100 // effective operation: C_c += ( B_c * ~uplo( conj( A_c ) ) )^T
103
105 bl1_toggle_uplo( uplo );
106
108 }
109 }
110 }
111 else // if ( bl1_is_row_storage( c_rs, c_cs ) )
112 {
113 if ( bl1_is_col_storage( a_rs, a_cs ) )
114 {
115 if ( bl1_is_col_storage( b_rs, b_cs ) )
116 {
117 // requested operation: C_r += uplo( A_c ) * B_c
118 // effective operation: C_c += ( uplo( A_c ) * B_c )^T
120
121 bl1_swap_ints( m, n );
122
124 }
125 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
126 {
127 // requested operation: C_r += uplo( A_c ) * B_r
128 // effective operation: C_c += B_c * ~uplo( conj( A_c ) )
131
132 bl1_swap_ints( m, n );
133
135 }
136 }
137 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
138 {
139 if ( bl1_is_col_storage( b_rs, b_cs ) )
140 {
141 // requested operation: C_r += uplo( A_r ) * B_c
142 // effective operation: C_c += B_c^T * ~uplo( A_c )
145
146 bl1_swap_ints( m, n );
147
149 bl1_toggle_uplo( uplo );
150
153 }
154 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
155 {
156 // requested operation: C_r += uplo( A_r ) * B_r
157 // effective operation: C_c += B_c * conj( ~uplo( A_c ) )
161
162 bl1_swap_ints( m, n );
163
164 bl1_toggle_uplo( uplo );
166 }
167 }
168 }
169
170 // We need a temporary matrix for the cases where B needs to be copied.
171 b_copy = b;
172 ldb_copy = ldb;
173 incb_copy = incb;
174
175 // There are two cases where we need to make a copy of B: one where the
176 // copy's dimensions are transposed from the original B, and one where
177 // the dimensions are not swapped.
178 if ( symm_needs_copyb )
179 {
181
182 // Set transb, which determines whether or not we need to copy from B
183 // as if it needs a transposition. If a transposition is needed, then
184 // m and n and have already been swapped. So in either case m
185 // represents the leading dimension of the copy.
188
189 b_copy = bl1_sallocm( m, n );
190 ldb_copy = m;
191 incb_copy = 1;
192
194 m,
195 n,
196 b, incb, ldb,
198 }
199
200 // There are two cases where we need to perform the symm and then axpy
201 // the result into C with a transposition. We handle those cases here.
202 if ( symm_needs_axpyt )
203 {
204 // We need a temporary matrix for holding C^T. Notice that m and n
205 // represent the dimensions of C, and thus C_trans is n-by-m
206 // (interpreting both as column-major matrices). So the leading
207 // dimension of the temporary matrix holding C^T is n.
208 c_trans = bl1_sallocm( n, m );
209 ldc_trans = n;
210 incc_trans = 1;
211
212 // Compute A * B (or B * A) and store the result in C_trans.
213 // Note that there is no overlap between the axpyt cases and
214 // the conja/copyb cases, hence the use of a, b, lda, and ldb.
216 uplo,
217 n,
218 m,
219 alpha,
220 a, lda,
221 b, ldb,
222 &zero,
224
225 // Scale C by beta.
227 m,
228 n,
229 beta,
230 c, incc, ldc );
231
232 // And finally, accumulate the matrix product in C_trans into C
233 // with a transpose.
235 m,
236 n,
237 &one,
239 c, incc, ldc );
240
241 // Free the temporary matrix for C.
243 }
244 else // no extra axpyt step needed
245 {
247 uplo,
248 m,
249 n,
250 alpha,
251 a, lda,
253 beta,
254 c, ldc );
255 }
256
257 if ( symm_needs_copyb )
258 bl1_sfree( b_copy );
259
260 // Free any temporary contiguous matrices, copying the result back to
261 // the original matrix.
263 &a, &a_rs, &a_cs );
264
266 &b, &b_rs, &b_cs );
267
269 n_save,
271 &c, &c_rs, &c_cs );
272}
void bl1_scopymt(trans1_t trans, int m, int n, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
Definition bl1_copymt.c:81
void bl1_ssymm_blas(side1_t side, uplo1_t uplo, int m, int n, float *alpha, float *a, int lda, float *b, int ldb, float *beta, float *c, int ldc)
Definition bl1_symm.c:1059
void bl1_screate_contigmr(uplo1_t uplo, int m, int n, float *a_save, int a_rs_save, int a_cs_save, float **a, int *a_rs, int *a_cs)
Definition bl1_create_contigmr.c:13

References bl1_is_col_storage(), bl1_s0(), bl1_s1(), bl1_sallocm(), bl1_saxpymt(), bl1_scopymt(), bl1_screate_contigm(), bl1_screate_contigmr(), bl1_set_dim_with_side(), bl1_sfree(), bl1_sfree_contigm(), bl1_sfree_saved_contigm(), bl1_sscalm(), bl1_ssymm_blas(), bl1_zero_dim2(), BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, and BLIS1_TRANSPOSE.

Referenced by bl1_shemm(), FLA_Hemm_external(), and FLA_Symm_external().

◆ bl1_ssymm_blas()

void bl1_ssymm_blas ( side1_t  side,
uplo1_t  uplo,
int  m,
int  n,
float alpha,
float a,
int  lda,
float b,
int  ldb,
float beta,
float c,
int  ldc 
)
1060{
1061#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
1065
1068
1070 cblas_side,
1071 cblas_uplo,
1072 m,
1073 n,
1074 *alpha,
1075 a, lda,
1076 b, ldb,
1077 *beta,
1078 c, ldc );
1079#else
1080 char blas_side;
1081 char blas_uplo;
1082
1085
1087 &blas_uplo,
1088 &m,
1089 &n,
1090 alpha,
1091 a, &lda,
1092 b, &ldb,
1093 beta,
1094 c, &ldc );
1095#endif
1096}
void F77_ssymm(char *side, char *uplo, int *m, int *n, float *alpha, float *a, int *lda, float *b, int *ldb, float *beta, float *c, int *ldc)
void cblas_ssymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const float alpha, const float *A, const int lda, const float *B, const int ldb, const float beta, float *C, const int ldc)

References bl1_param_map_to_netlib_side(), bl1_param_map_to_netlib_uplo(), cblas_ssymm(), CblasColMajor, and F77_ssymm().

Referenced by bl1_ssymm().

◆ bl1_ssyr2k()

void bl1_ssyr2k ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
float alpha,
float a,
int  a_rs,
int  a_cs,
float b,
int  b_rs,
int  b_cs,
float beta,
float c,
int  c_rs,
int  c_cs 
)
14{
15 uplo1_t uplo_save = uplo;
16 int m_save = m;
17 float* a_save = a;
18 float* b_save = b;
19 float* c_save = c;
20 int a_rs_save = a_rs;
21 int a_cs_save = a_cs;
22 int b_rs_save = b_rs;
23 int b_cs_save = b_cs;
24 int c_rs_save = c_rs;
25 int c_cs_save = c_cs;
26 float* a_copy;
27 float* b_copy;
28 int lda, inca;
29 int ldb, incb;
30 int ldc, incc;
35
36 // Return early if possible.
37 if ( bl1_zero_dim2( m, k ) ) return;
38
39 // If necessary, allocate, initialize, and use a temporary contiguous
40 // copy of each matrix rather than the original matrices.
42 m,
43 k,
45 &a, &a_rs, &a_cs );
46
48 m,
49 k,
51 &b, &b_rs, &b_cs );
52
54 m,
55 m,
57 &c, &c_rs, &c_cs );
58
59 // Initialize with values assuming column-major storage.
60 lda = a_cs;
61 inca = a_rs;
62 ldb = b_cs;
63 incb = b_rs;
64 ldc = c_cs;
65 incc = c_rs;
66
67 // Adjust the parameters based on the storage of each matrix.
69 {
71 {
73 {
74 // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
75 // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
76 }
77 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
78 {
79 // requested operation: uplo( C_c ) += A_c * B_r' + B_r * A_c'
80 // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
82 }
83 }
84 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
85 {
87 {
88 // requested operation: uplo( C_c ) += A_r * B_c' + B_c * A_r'
89 // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
91 }
92 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
93 {
94 // requested operation: uplo( C_c ) += A_r * B_r' + B_r * A_r'
95 // requested operation: uplo( C_c ) += conj( A_c' * B_c + B_c' * A_c )
98
100 }
101 }
102 }
103 else // if ( bl1_is_row_storage( c_rs, c_cs ) )
104 {
105 if ( bl1_is_col_storage( a_rs, a_cs ) )
106 {
107 if ( bl1_is_col_storage( b_rs, b_cs ) )
108 {
109 // requested operation: uplo( C_r ) += A_c * B_c' + B_c * A_c'
110 // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
112
113 bl1_toggle_uplo( uplo );
114 }
115 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
116 {
117 // requested operation: uplo( C_r ) += A_c * B_r' + B_r * A_c'
118 // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
120
122
123 bl1_toggle_uplo( uplo );
124 }
125 }
126 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
127 {
128 if ( bl1_is_col_storage( b_rs, b_cs ) )
129 {
130 // requested operation: uplo( C_r ) += A_r * B_c' + B_c * A_r'
131 // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
133
135
136 bl1_toggle_uplo( uplo );
137 }
138 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
139 {
140 // requested operation: uplo( C_r ) += A_r * B_r' + B_r * A_r'
141 // requested operation: ~uplo( C_c ) += A_c' * B_c + B_c' * A_c
145
146 bl1_toggle_uplo( uplo );
148 }
149 }
150 }
151
152 a_copy = a;
153 lda_copy = lda;
154 inca_copy = inca;
155
156 // There are two cases where we need to copy A column-major storage.
157 // We handle those two cases here.
158 if ( syr2k_needs_copya )
159 {
160 int m_a;
161 int n_a;
162
163 // Determine the dimensions of A according to the value of trans. We
164 // need this in order to set the leading dimension of the copy of A.
166
167 // We need a temporary matrix to hold a column-major copy of A.
168 a_copy = bl1_sallocm( m, k );
169 lda_copy = m_a;
170 inca_copy = 1;
171
172 // Copy the contents of A into A_copy.
174 m_a,
175 n_a,
176 a, inca, lda,
178 }
179
180 b_copy = b;
181 ldb_copy = ldb;
182 incb_copy = incb;
183
184 // There are two cases where we need to copy B column-major storage.
185 // We handle those two cases here.
186 if ( syr2k_needs_copyb )
187 {
188 int m_b;
189 int n_b;
190
191 // Determine the dimensions of B according to the value of trans. We
192 // need this in order to set the leading dimension of the copy of B.
194
195 // We need a temporary matrix to hold a column-major copy of B.
196 b_copy = bl1_sallocm( m, k );
197 ldb_copy = m_b;
198 incb_copy = 1;
199
200 // Copy the contents of B into B_copy.
202 m_b,
203 n_b,
204 b, incb, ldb,
206 }
207
208 bl1_ssyr2k_blas( uplo,
209 trans,
210 m,
211 k,
212 alpha,
215 beta,
216 c, ldc );
217
218 if ( syr2k_needs_copya )
219 bl1_sfree( a_copy );
220
221 if ( syr2k_needs_copyb )
222 bl1_sfree( b_copy );
223
224 // Free any temporary contiguous matrices, copying the result back to
225 // the original matrix.
227 &a, &a_rs, &a_cs );
228
230 &b, &b_rs, &b_cs );
231
233 m_save,
234 m_save,
236 &c, &c_rs, &c_cs );
237}
void bl1_ssyr2k_blas(uplo1_t uplo, trans1_t trans, int m, int k, float *alpha, float *a, int lda, float *b, int ldb, float *beta, float *c, int ldc)
Definition bl1_syr2k.c:919
void bl1_sfree_saved_contigmr(uplo1_t uplo, int m, int n, float *a_save, int a_rs_save, int a_cs_save, float **a, int *a_rs, int *a_cs)
Definition bl1_free_saved_contigmr.c:13

References bl1_is_col_storage(), bl1_sallocm(), bl1_scopymt(), bl1_screate_contigmr(), bl1_screate_contigmt(), bl1_set_dims_with_trans(), bl1_sfree(), bl1_sfree_contigm(), bl1_sfree_saved_contigmr(), bl1_ssyr2k_blas(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by bl1_sher2k(), FLA_Her2k_external(), and FLA_Syr2k_external().

◆ bl1_ssyr2k_blas()

void bl1_ssyr2k_blas ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
float alpha,
float a,
int  lda,
float b,
int  ldb,
float beta,
float c,
int  ldc 
)
920{
921#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
925
926 // BLAS doesn't recognize the conjugate-transposition constant for syr2k,
927 // so we have to map it down to regular transposition.
929
932
936 m,
937 k,
938 *alpha,
939 a, lda,
940 b, ldb,
941 *beta,
942 c, ldc );
943#else
944 char blas_uplo;
945 char blas_trans;
946
947 // BLAS doesn't recognize the conjugate-transposition constant for syr2k,
948 // so we have to map it down to regular transposition.
950
953
955 &blas_trans,
956 &m,
957 &k,
958 alpha,
959 a, &lda,
960 b, &ldb,
961 beta,
962 c, &ldc );
963#endif
964}
void F77_ssyr2k(char *uplo, char *transa, int *n, int *k, float *alpha, float *a, int *lda, float *b, int *ldb, float *beta, float *c, int *ldc)
void cblas_ssyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const float alpha, const float *A, const int lda, const float *B, const int ldb, const float beta, float *C, const int ldc)

References bl1_is_conjtrans(), bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), BLIS1_TRANSPOSE, cblas_ssyr2k(), CblasColMajor, and F77_ssyr2k().

Referenced by bl1_ssyr2k().

◆ bl1_ssyrk()

void bl1_ssyrk ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
float alpha,
float a,
int  a_rs,
int  a_cs,
float beta,
float c,
int  c_rs,
int  c_cs 
)
14{
15 uplo1_t uplo_save = uplo;
16 int m_save = m;
17 float* a_save = a;
18 float* c_save = c;
19 int a_rs_save = a_rs;
20 int a_cs_save = a_cs;
21 int c_rs_save = c_rs;
22 int c_cs_save = c_cs;
23 int lda, inca;
24 int ldc, incc;
25
26 // Return early if possible.
27 if ( bl1_zero_dim2( m, k ) ) return;
28
29 // If necessary, allocate, initialize, and use a temporary contiguous
30 // copy of each matrix rather than the original matrices.
32 m,
33 k,
35 &a, &a_rs, &a_cs );
36
38 m,
39 m,
41 &c, &c_rs, &c_cs );
42
43 // Initialize with values assuming column-major storage.
44 lda = a_cs;
45 inca = a_rs;
46 ldc = c_cs;
47 incc = c_rs;
48
49 // Adjust the parameters based on the storage of each matrix.
51 {
53 {
54 // requested operation: uplo( C_c ) += A_c * A_c^T
55 // effective operation: uplo( C_c ) += A_c * A_c^T
56 }
57 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
58 {
59 // requested operation: uplo( C_c ) += A_r * A_r^T
60 // effective operation: uplo( C_c ) += A_c^T * A_c
62
64 }
65 }
66 else // if ( bl1_is_row_storage( c_rs, c_cs ) )
67 {
69 {
70 // requested operation: uplo( C_r ) += A_c * A_c^T
71 // effective operation: ~uplo( C_c ) += A_c * A_c^T
73
74 bl1_toggle_uplo( uplo );
75 }
76 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
77 {
78 // requested operation: uplo( C_r ) += A_r * A_r^T
79 // effective operation: ~uplo( C_c ) += A_c^T * A_c
82
83 bl1_toggle_uplo( uplo );
85 }
86 }
87
88 bl1_ssyrk_blas( uplo,
89 trans,
90 m,
91 k,
92 alpha,
93 a, lda,
94 beta,
95 c, ldc );
96
97 // Free any temporary contiguous matrices, copying the result back to
98 // the original matrix.
100 &a, &a_rs, &a_cs );
101
103 m_save,
104 m_save,
106 &c, &c_rs, &c_cs );
107}
void bl1_ssyrk_blas(uplo1_t uplo, trans1_t trans, int m, int k, float *alpha, float *a, int lda, float *beta, float *c, int ldc)
Definition bl1_syrk.c:399

References bl1_is_col_storage(), bl1_screate_contigmr(), bl1_screate_contigmt(), bl1_sfree_contigm(), bl1_sfree_saved_contigmr(), bl1_ssyrk_blas(), and bl1_zero_dim2().

Referenced by bl1_sherk(), FLA_Herk_external(), FLA_Syrk_external(), and FLA_UDdate_UT_ops_var1().

◆ bl1_ssyrk_blas()

void bl1_ssyrk_blas ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
float alpha,
float a,
int  lda,
float beta,
float c,
int  ldc 
)
400{
401#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
405
408
412 m,
413 k,
414 *alpha,
415 a, lda,
416 *beta,
417 c, ldc );
418#else
419 char blas_uplo;
420 char blas_trans;
421
424
426 &blas_trans,
427 &m,
428 &k,
429 alpha,
430 a, &lda,
431 beta,
432 c, &ldc );
433#endif
434}
void F77_ssyrk(char *uplo, char *transa, int *n, int *k, float *alpha, float *a, int *lda, float *beta, float *c, int *ldc)
void cblas_ssyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const float alpha, const float *A, const int lda, const float beta, float *C, const int ldc)

References bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), cblas_ssyrk(), CblasColMajor, and F77_ssyrk().

Referenced by bl1_ssyrk().

◆ bl1_strmm()

void bl1_strmm ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
float alpha,
float a,
int  a_rs,
int  a_cs,
float b,
int  b_rs,
int  b_cs 
)
14{
15 int m_save = m;
16 int n_save = n;
17 float* a_save = a;
18 float* b_save = b;
19 int a_rs_save = a_rs;
20 int a_cs_save = a_cs;
21 int b_rs_save = b_rs;
22 int b_cs_save = b_cs;
23 int dim_a;
24 int lda, inca;
25 int ldb, incb;
26
27 // Return early if possible.
28 if ( bl1_zero_dim2( m, n ) ) return;
29
30 // If necessary, allocate, initialize, and use a temporary contiguous
31 // copy of each matrix rather than the original matrices.
34 dim_a,
35 dim_a,
37 &a, &a_rs, &a_cs );
38
40 n,
42 &b, &b_rs, &b_cs );
43
44 // Initialize with values assuming column-major storage.
45 lda = a_cs;
46 inca = a_rs;
47 ldb = b_cs;
48 incb = b_rs;
49
50 // Adjust the parameters based on the storage of each matrix.
52 {
54 {
55 // requested operation: B_c := tr( uplo( A_c ) ) * B_c
56 // effective operation: B_c := tr( uplo( A_c ) ) * B_c
57 }
58 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
59 {
60 // requested operation: B_c := tr( uplo( A_r ) ) * B_c
61 // effective operation: B_c := tr( ~uplo( A_c ) )^T * B_c
63
64 bl1_toggle_uplo( uplo );
66 }
67 }
68 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
69 {
71 {
72 // requested operation: B_r := tr( uplo( A_c ) ) * B_r
73 // effective operation: B_c := B_c * tr( uplo( A_c ) )^T
75
76 bl1_swap_ints( m, n );
77
80 }
81 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
82 {
83 // requested operation: B_r := tr( uplo( A_r ) ) * B_r
84 // effective operation: B_c := B_c * tr( ~uplo( A_c ) )
87
88 bl1_swap_ints( m, n );
89
90 bl1_toggle_uplo( uplo );
92 }
93 }
94
96 uplo,
97 trans,
98 diag,
99 m,
100 n,
101 alpha,
102 a, lda,
103 b, ldb );
104
105 // Free any temporary contiguous matrices, copying the result back to
106 // the original matrix.
108 &a, &a_rs, &a_cs );
109
111 n_save,
113 &b, &b_rs, &b_cs );
114}
void bl1_strmm_blas(side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, float *alpha, float *a, int lda, float *b, int ldb)
Definition bl1_trmm.c:520

References bl1_is_col_storage(), bl1_screate_contigm(), bl1_screate_contigmr(), bl1_set_dim_with_side(), bl1_sfree_contigm(), bl1_sfree_saved_contigm(), bl1_strmm_blas(), and bl1_zero_dim2().

Referenced by bl1_strmmsx(), and FLA_Trmm_external().

◆ bl1_strmm_blas()

void bl1_strmm_blas ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
float alpha,
float a,
int  lda,
float b,
int  ldb 
)
521{
522#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
528
533
539 m,
540 n,
541 *alpha,
542 a, lda,
543 b, ldb );
544#else
545 char blas_side;
546 char blas_uplo;
547 char blas_trans;
548 char blas_diag;
549
554
556 &blas_uplo,
557 &blas_trans,
558 &blas_diag,
559 &m,
560 &n,
561 alpha,
562 a, &lda,
563 b, &ldb );
564#endif
565}
void F77_strmm(char *side, char *uplo, char *transa, char *diag, int *m, int *n, float *alpha, float *a, int *lda, float *b, int *ldb)
void cblas_strmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const float alpha, const float *A, const int lda, float *B, const int ldb)

References bl1_param_map_to_netlib_diag(), bl1_param_map_to_netlib_side(), bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), cblas_strmm(), CblasColMajor, and F77_strmm().

Referenced by bl1_strmm().

◆ bl1_strmmsx()

void bl1_strmmsx ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
float alpha,
float a,
int  a_rs,
int  a_cs,
float b,
int  b_rs,
int  b_cs,
float beta,
float c,
int  c_rs,
int  c_cs 
)
14{
15 int m_save = m;
16 int n_save = n;
17 float* a_save = a;
18 float* b_save = b;
19 float* c_save = c;
20 int a_rs_save = a_rs;
21 int a_cs_save = a_cs;
22 int b_rs_save = b_rs;
23 int b_cs_save = b_cs;
24 int c_rs_save = c_rs;
25 int c_cs_save = c_cs;
26 float one = bl1_s1();
27 float* b_copy;
28 int dim_a;
30
31 // Return early if possible.
32 if ( bl1_zero_dim2( m, n ) ) return;
33
34 // If necessary, allocate, initialize, and use a temporary contiguous
35 // copy of each matrix rather than the original matrices.
38 dim_a,
39 dim_a,
41 &a, &a_rs, &a_cs );
42
44 n,
46 &b, &b_rs, &b_cs );
47
49 n,
51 &c, &c_rs, &c_cs );
52
53 // Create a copy of B to use in the computation so the original matrix is
54 // left untouched.
55 b_copy = bl1_sallocm( m, n );
56
57 // Match the strides of B_copy to that of B.
59 {
60 b_copy_rs = 1;
61 b_copy_cs = m;
62 }
63 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
64 {
65 b_copy_rs = n;
66 b_copy_cs = 1;
67 }
68
69 // Copy the contents of B to B_copy.
71 m,
72 n,
73 b, b_rs, b_cs,
75
76 // Perform the operation on B_copy.
78 uplo,
79 trans,
80 diag,
81 m,
82 n,
83 alpha,
84 a, a_rs, a_cs,
86
87 // Scale C by beta.
89 m,
90 n,
91 beta,
92 c, c_rs, c_cs );
93
94 // Add B_copy into C.
96 m,
97 n,
98 &one,
100 c, c_rs, c_cs );
101
102 // Free the copy of B.
103 bl1_sfree( b_copy );
104
105 // Free any temporary contiguous matrices, copying the result back to
106 // the original matrix.
108 &a, &a_rs, &a_cs );
109
111 &b, &b_rs, &b_cs );
112
114 n_save,
116 &c, &c_rs, &c_cs );
117}
void bl1_strmm(side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
Definition bl1_trmm.c:13

References bl1_is_col_storage(), bl1_s1(), bl1_sallocm(), bl1_saxpymt(), bl1_scopymt(), bl1_screate_contigm(), bl1_screate_contigmr(), bl1_set_dim_with_side(), bl1_sfree(), bl1_sfree_contigm(), bl1_sfree_saved_contigm(), bl1_sscalm(), bl1_strmm(), bl1_zero_dim2(), BLIS1_NO_CONJUGATE, and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Trmmsx_external().

◆ bl1_strsm()

void bl1_strsm ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
float alpha,
float a,
int  a_rs,
int  a_cs,
float b,
int  b_rs,
int  b_cs 
)
14{
15 int m_save = m;
16 int n_save = n;
17 float* a_save = a;
18 float* b_save = b;
19 int a_rs_save = a_rs;
20 int a_cs_save = a_cs;
21 int b_rs_save = b_rs;
22 int b_cs_save = b_cs;
23 int dim_a;
24 int lda, inca;
25 int ldb, incb;
26
27 // Return early if possible.
28 if ( bl1_zero_dim2( m, n ) ) return;
29
30 // If necessary, allocate, initialize, and use a temporary contiguous
31 // copy of each matrix rather than the original matrices.
34 dim_a,
35 dim_a,
37 &a, &a_rs, &a_cs );
38
40 n,
42 &b, &b_rs, &b_cs );
43
44 // Initialize with values assuming column-major storage.
45 lda = a_cs;
46 inca = a_rs;
47 ldb = b_cs;
48 incb = b_rs;
49
50 // Adjust the parameters based on the storage of each matrix.
52 {
54 {
55 // requested operation: B_c := tr( uplo( A_c ) ) * B_c
56 // effective operation: B_c := tr( uplo( A_c ) ) * B_c
57 }
58 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
59 {
60 // requested operation: B_c := tr( uplo( A_r ) ) * B_c
61 // effective operation: B_c := tr( ~uplo( A_c ) )^T * B_c
63
64 bl1_toggle_uplo( uplo );
66 }
67 }
68 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
69 {
71 {
72 // requested operation: B_r := tr( uplo( A_c ) ) * B_r
73 // effective operation: B_c := B_c * tr( uplo( A_c ) )^T
75
76 bl1_swap_ints( m, n );
77
80 }
81 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
82 {
83 // requested operation: B_r := tr( uplo( A_r ) ) * B_r
84 // effective operation: B_c := B_c * tr( ~uplo( A_c ) )
87
88 bl1_swap_ints( m, n );
89
90 bl1_toggle_uplo( uplo );
92 }
93 }
94
96 uplo,
97 trans,
98 diag,
99 m,
100 n,
101 alpha,
102 a, lda,
103 b, ldb );
104
105 // Free any temporary contiguous matrices, copying the result back to
106 // the original matrix.
108 &a, &a_rs, &a_cs );
109
111 n_save,
113 &b, &b_rs, &b_cs );
114}
void bl1_strsm_blas(side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, float *alpha, float *a, int lda, float *b, int ldb)
Definition bl1_trsm.c:520

References bl1_is_col_storage(), bl1_screate_contigm(), bl1_screate_contigmr(), bl1_set_dim_with_side(), bl1_sfree_contigm(), bl1_sfree_saved_contigm(), bl1_strsm_blas(), and bl1_zero_dim2().

Referenced by bl1_strsmsx(), FLA_LU_nopiv_ops_var1(), FLA_LU_nopiv_ops_var2(), FLA_LU_nopiv_ops_var3(), FLA_LU_piv_ops_var3(), and FLA_Trsm_external().

◆ bl1_strsm_blas()

void bl1_strsm_blas ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
float alpha,
float a,
int  lda,
float b,
int  ldb 
)
521{
522#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
528
533
539 m,
540 n,
541 *alpha,
542 a, lda,
543 b, ldb );
544#else
545 char blas_side;
546 char blas_uplo;
547 char blas_trans;
548 char blas_diag;
549
554
556 &blas_uplo,
557 &blas_trans,
558 &blas_diag,
559 &m,
560 &n,
561 alpha,
562 a, &lda,
563 b, &ldb );
564#endif
565}
void F77_strsm(char *side, char *uplo, char *transa, char *diag, int *m, int *n, float *alpha, float *a, int *lda, float *b, int *ldb)
void cblas_strsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const float alpha, const float *A, const int lda, float *B, const int ldb)

References bl1_param_map_to_netlib_diag(), bl1_param_map_to_netlib_side(), bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), cblas_strsm(), CblasColMajor, and F77_strsm().

Referenced by bl1_strsm().

◆ bl1_strsmsx()

void bl1_strsmsx ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
float alpha,
float a,
int  a_rs,
int  a_cs,
float b,
int  b_rs,
int  b_cs,
float beta,
float c,
int  c_rs,
int  c_cs 
)
14{
15 int m_save = m;
16 int n_save = n;
17 float* a_save = a;
18 float* b_save = b;
19 float* c_save = c;
20 int a_rs_save = a_rs;
21 int a_cs_save = a_cs;
22 int b_rs_save = b_rs;
23 int b_cs_save = b_cs;
24 int c_rs_save = c_rs;
25 int c_cs_save = c_cs;
26 float one = bl1_s1();
27 float* b_copy;
28 int dim_a;
30
31 // Return early if possible.
32 if ( bl1_zero_dim2( m, n ) ) return;
33
34 // If necessary, allocate, initialize, and use a temporary contiguous
35 // copy of each matrix rather than the original matrices.
38 dim_a,
39 dim_a,
41 &a, &a_rs, &a_cs );
42
44 n,
46 &b, &b_rs, &b_cs );
47
49 n,
51 &c, &c_rs, &c_cs );
52
53 // Create a copy of B to use in the computation so the original matrix is
54 // left untouched.
55 b_copy = bl1_sallocm( m, n );
56
57 // Match the strides of B_copy to that of B.
59 {
60 b_copy_rs = 1;
61 b_copy_cs = m;
62 }
63 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
64 {
65 b_copy_rs = n;
66 b_copy_cs = 1;
67 }
68
69 // Copy the contents of B to B_copy.
71 m,
72 n,
73 b, b_rs, b_cs,
75
76 // Perform the operation on B_copy.
78 uplo,
79 trans,
80 diag,
81 m,
82 n,
83 alpha,
84 a, a_rs, a_cs,
86
87 // Scale C by beta.
89 m,
90 n,
91 beta,
92 c, c_rs, c_cs );
93
94 // Add B_copy into C.
96 m,
97 n,
98 &one,
100 c, c_rs, c_cs );
101
102 // Free the copy of B.
103 bl1_sfree( b_copy );
104
105 // Free any temporary contiguous matrices, copying the result back to
106 // the original matrix.
108 &a, &a_rs, &a_cs );
109
111 &b, &b_rs, &b_cs );
112
114 n_save,
116 &c, &c_rs, &c_cs );
117}
void bl1_strsm(side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
Definition bl1_trsm.c:13

References bl1_is_col_storage(), bl1_s1(), bl1_sallocm(), bl1_saxpymt(), bl1_scopymt(), bl1_screate_contigm(), bl1_screate_contigmr(), bl1_set_dim_with_side(), bl1_sfree(), bl1_sfree_contigm(), bl1_sfree_saved_contigm(), bl1_sscalm(), bl1_strsm(), bl1_zero_dim2(), BLIS1_NO_CONJUGATE, and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Trsmsx_external().

◆ bl1_zgemm()

void bl1_zgemm ( trans1_t  transa,
trans1_t  transb,
int  m,
int  k,
int  n,
dcomplex alpha,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs,
dcomplex beta,
dcomplex c,
int  c_rs,
int  c_cs 
)
874{
875 int m_save = m;
876 int n_save = n;
877 dcomplex* a_save = a;
878 dcomplex* b_save = b;
879 dcomplex* c_save = c;
880 int a_rs_save = a_rs;
881 int a_cs_save = a_cs;
882 int b_rs_save = b_rs;
883 int b_cs_save = b_cs;
884 int c_rs_save = c_rs;
885 int c_cs_save = c_cs;
886 dcomplex zero = bl1_z0();
887 dcomplex one = bl1_z1();
893 int lda, inca;
894 int ldb, incb;
895 int ldc, incc;
896 int lda_conj, inca_conj;
897 int ldb_conj, incb_conj;
899 int m_gemm, n_gemm;
901 int a_was_copied;
902 int b_was_copied;
903
904 // Return early if possible.
905 if ( bl1_zero_dim3( m, k, n ) )
906 {
908 m,
909 n,
910 beta,
911 c, c_rs, c_cs );
912 return;
913 }
914
915 // If necessary, allocate, initialize, and use a temporary contiguous
916 // copy of each matrix rather than the original matrices.
918 m,
919 k,
921 &a, &a_rs, &a_cs );
922
924 k,
925 n,
927 &b, &b_rs, &b_cs );
928
930 n,
932 &c, &c_rs, &c_cs );
933
934 // Figure out whether A and/or B was copied to contiguous memory. This
935 // is used later to prevent redundant copying.
936 a_was_copied = ( a != a_save );
937 b_was_copied = ( b != b_save );
938
939 // These are used to track the original values of a and b prior to any
940 // operand swapping that might take place. This is necessary for proper
941 // freeing of memory when one is a temporary contiguous matrix.
942 a_unswap = a;
943 b_unswap = b;
944
945 // These are used to track the dimensions of the product of the
946 // A and B operands to the BLAS invocation of gemm. These differ
947 // from m and n when the operands need to be swapped.
948 m_gemm = m;
949 n_gemm = n;
950
951 // Initialize with values assuming column-major storage.
952 lda = a_cs;
953 inca = a_rs;
954 ldb = b_cs;
955 incb = b_rs;
956 ldc = c_cs;
957 incc = c_rs;
958
959 // Adjust the parameters based on the storage of each matrix.
960 if ( bl1_is_col_storage( c_rs, c_cs ) )
961 {
962 if ( bl1_is_col_storage( a_rs, a_cs ) )
963 {
964 if ( bl1_is_col_storage( b_rs, b_cs ) )
965 {
966 // requested operation: C_c += tr( A_c ) * tr( B_c )
967 // effective operation: C_c += tr( A_c ) * tr( B_c )
968 }
969 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
970 {
971
972 // requested operation: C_c += tr( A_c ) * tr( B_r )
973 // effective operation: C_c += tr( A_c ) * tr( B_c )^T
975
977 }
978 }
979 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
980 {
981 if ( bl1_is_col_storage( b_rs, b_cs ) )
982 {
983 // requested operation: C_c += tr( A_r ) * tr( B_c )
984 // effective operation: C_c += tr( A_r )^T * tr( B_c )
986
988 }
989 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
990 {
991 // requested operation: C_c += tr( A_r ) * tr( B_r )
992 // effective operation: C_c += ( tr( B_c ) * tr( A_c ) )^T
995
1001
1004 }
1005 }
1006 }
1007 else // if ( bl1_is_row_storage( c_rs, c_cs ) )
1008 {
1009 if ( bl1_is_col_storage( a_rs, a_cs ) )
1010 {
1011 if ( bl1_is_col_storage( b_rs, b_cs ) )
1012 {
1013 // requested operation: C_r += tr( A_c ) * tr( B_c )
1014 // effective operation: C_c += ( tr( A_c ) * tr( B_c ) )^T
1016
1017 bl1_swap_ints( m, n );
1018
1020 }
1021 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
1022 {
1023 // requested operation: C_r += tr( A_c ) * tr( B_r )
1024 // effective operation: C_c += tr( B_c ) * tr( A_c )^T
1027
1029
1030 bl1_swap_ints( m, n );
1034 bl1_swap_ints( lda, ldb );
1037 }
1038 }
1039 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
1040 {
1041 if ( bl1_is_col_storage( b_rs, b_cs ) )
1042 {
1043 // requested operation: C_r += tr( A_r ) * tr( B_c )
1044 // effective operation: C_c += tr( B_c )^T * tr( A_c )
1047
1049
1050 bl1_swap_ints( m, n );
1054 bl1_swap_ints( lda, ldb );
1057 }
1058 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
1059 {
1060 // requested operation: C_r += tr( A_r ) * tr( B_r )
1061 // effective operation: C_c += tr( B_c ) * tr( A_c )
1065
1066 bl1_swap_ints( m, n );
1070 bl1_swap_ints( lda, ldb );
1073 }
1074 }
1075 }
1076
1077 // We need a temporary matrix for the case where A is conjugated.
1078 a_conj = a;
1079 lda_conj = lda;
1080 inca_conj = inca;
1081
1082 // If transa indicates conjugate-no-transpose and A was not already
1083 // copied, then copy and conjugate it to a temporary matrix. Otherwise,
1084 // if transa indicates conjugate-no-transpose and A was already copied,
1085 // just conjugate it.
1087 {
1088 a_conj = bl1_zallocm( m_gemm, k );
1089 lda_conj = m_gemm;
1090 inca_conj = 1;
1091
1093 m_gemm,
1094 k,
1095 a, inca, lda,
1097 }
1098 else if ( bl1_is_conjnotrans( transa ) && a_was_copied )
1099 {
1101 k,
1103 }
1104
1105 // We need a temporary matrix for the case where B is conjugated.
1106 b_conj = b;
1107 ldb_conj = ldb;
1108 incb_conj = incb;
1109
1110 // If transb indicates conjugate-no-transpose and B was not already
1111 // copied, then copy and conjugate it to a temporary matrix. Otherwise,
1112 // if transb indicates conjugate-no-transpose and B was already copied,
1113 // just conjugate it.
1115 {
1116 b_conj = bl1_zallocm( k, n_gemm );
1117 ldb_conj = k;
1118 incb_conj = 1;
1119
1121 k,
1122 n_gemm,
1123 b, incb, ldb,
1125 }
1126 else if ( bl1_is_conjnotrans( transb ) && b_was_copied )
1127 {
1128 bl1_zconjm( k,
1129 n_gemm,
1131 }
1132
1133 // There are two cases where we need to perform the gemm and then axpy
1134 // the result into C with a transposition. We handle those cases here.
1135 if ( gemm_needs_axpyt )
1136 {
1137 // We need a temporary matrix for holding C^T. Notice that m and n
1138 // represent the dimensions of C, while m_gemm and n_gemm are the
1139 // dimensions of the actual product op(A)*op(B), which may be n-by-m
1140 // since the operands may have been swapped.
1142 ldc_trans = m_gemm;
1143 incc_trans = 1;
1144
1145 // Compute tr( A ) * tr( B ), where A and B may have been swapped
1146 // to reference the other, and store the result in C_trans.
1148 transb,
1149 m_gemm,
1150 n_gemm,
1151 k,
1152 alpha,
1155 &zero,
1156 c_trans, ldc_trans );
1157
1158 // Scale C by beta.
1160 m,
1161 n,
1162 beta,
1163 c, incc, ldc );
1164
1165 // And finally, accumulate the matrix product in C_trans into C
1166 // with a transpose.
1168 m,
1169 n,
1170 &one,
1172 c, incc, ldc );
1173
1174 // Free the temporary matrix for C.
1175 bl1_zfree( c_trans );
1176 }
1177 else // no extra axpyt step needed
1178 {
1180 transb,
1181 m_gemm,
1182 n_gemm,
1183 k,
1184 alpha,
1187 beta,
1188 c, ldc );
1189 }
1190
1192 bl1_zfree( a_conj );
1193
1195 bl1_zfree( b_conj );
1196
1197 // Free any temporary contiguous matrices, copying the result back to
1198 // the original matrix.
1200 &a_unswap, &a_rs, &a_cs );
1201
1203 &b_unswap, &b_rs, &b_cs );
1204
1206 n_save,
1208 &c, &c_rs, &c_cs );
1209}
void bl1_zaxpymt(trans1_t trans, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
Definition bl1_axpymt.c:248
void bl1_zconjm(int m, int n, dcomplex *a, int a_rs, int a_cs)
Definition bl1_conjm.c:72
void bl1_zcopymt(trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
Definition bl1_copymt.c:286
void bl1_zgemm_blas(trans1_t transa, trans1_t transb, int m, int n, int k, dcomplex *alpha, dcomplex *a, int lda, dcomplex *b, int ldb, dcomplex *beta, dcomplex *c, int ldc)
Definition bl1_gemm.c:1336
void bl1_zscalm(conj1_t conj, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs)
Definition bl1_scalm.c:273
dcomplex bl1_z0(void)
Definition bl1_constants.c:133
void bl1_zcreate_contigmt(trans1_t trans_dims, int m, int n, dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition bl1_create_contigmt.c:127
dcomplex bl1_z1(void)
Definition bl1_constants.c:69
void bl1_zcreate_contigm(int m, int n, dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition bl1_create_contigm.c:115
dcomplex * bl1_zallocm(unsigned int m, unsigned int n)
Definition bl1_allocm.c:45
void bl1_zfree(dcomplex *p)
Definition bl1_free.c:45
void bl1_zfree_contigm(dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition bl1_free_contigm.c:61
void bl1_zfree_saved_contigm(int m, int n, dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition bl1_free_saved_contigm.c:82
Definition blis_type_defs.h:138

References bl1_is_col_storage(), bl1_is_conjnotrans(), bl1_z0(), bl1_z1(), bl1_zallocm(), bl1_zaxpymt(), bl1_zconjm(), bl1_zcopymt(), bl1_zcreate_contigm(), bl1_zcreate_contigmt(), bl1_zero_dim3(), bl1_zfree(), bl1_zfree_contigm(), bl1_zfree_saved_contigm(), bl1_zgemm_blas(), bl1_zscalm(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_NO_CONJUGATE, and BLIS1_TRANSPOSE.

Referenced by FLA_Gemm_external().

◆ bl1_zgemm_blas()

void bl1_zgemm_blas ( trans1_t  transa,
trans1_t  transb,
int  m,
int  n,
int  k,
dcomplex alpha,
dcomplex a,
int  lda,
dcomplex b,
int  ldb,
dcomplex beta,
dcomplex c,
int  ldc 
)
1337{
1338#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
1342
1345
1349 m,
1350 n,
1351 k,
1352 alpha,
1353 a, lda,
1354 b, ldb,
1355 beta,
1356 c, ldc );
1357#else
1358 char blas_transa;
1359 char blas_transb;
1360
1363
1365 &blas_transb,
1366 &m,
1367 &n,
1368 &k,
1369 alpha,
1370 a, &lda,
1371 b, &ldb,
1372 beta,
1373 c, &ldc );
1374#endif
1375}
void F77_zgemm(char *transa, char *transb, int *m, int *n, int *k, dcomplex *alpha, dcomplex *a, int *lda, dcomplex *b, int *ldb, dcomplex *beta, dcomplex *c, int *ldc)
void cblas_zgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const void *beta, void *C, const int ldc)

References bl1_param_map_to_netlib_trans(), cblas_zgemm(), CblasColMajor, and F77_zgemm().

Referenced by bl1_zgemm().

◆ bl1_zhemm()

void bl1_zhemm ( side1_t  side,
uplo1_t  uplo,
int  m,
int  n,
dcomplex alpha,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs,
dcomplex beta,
dcomplex c,
int  c_rs,
int  c_cs 
)
349{
350 int m_save = m;
351 int n_save = n;
352 dcomplex* a_save = a;
353 dcomplex* b_save = b;
354 dcomplex* c_save = c;
355 int a_rs_save = a_rs;
356 int a_cs_save = a_cs;
357 int b_rs_save = b_rs;
358 int b_cs_save = b_cs;
359 int c_rs_save = c_rs;
360 int c_cs_save = c_cs;
361 dcomplex zero = bl1_z0();
362 dcomplex one = bl1_z1();
366 int dim_a;
367 int lda, inca;
368 int ldb, incb;
369 int ldc, incc;
370 int lda_conj, inca_conj;
371 int ldb_copy, incb_copy;
377 int a_was_copied;
378
379 // Return early if possible.
380 if ( bl1_zero_dim2( m, n ) ) return;
381
382 // If necessary, allocate, initialize, and use a temporary contiguous
383 // copy of each matrix rather than the original matrices.
386 dim_a,
387 dim_a,
389 &a, &a_rs, &a_cs );
390
392 n,
394 &b, &b_rs, &b_cs );
395
397 n,
399 &c, &c_rs, &c_cs );
400
401 // Figure out whether A was copied to contiguous memory. This is used to
402 // prevent redundant copying.
403 a_was_copied = ( a != a_save );
404
405 // Initialize with values assuming column-major storage.
406 lda = a_cs;
407 inca = a_rs;
408 ldb = b_cs;
409 incb = b_rs;
410 ldc = c_cs;
411 incc = c_rs;
412
413 // Adjust the parameters based on the storage of each matrix.
414 if ( bl1_is_col_storage( c_rs, c_cs ) )
415 {
416 if ( bl1_is_col_storage( a_rs, a_cs ) )
417 {
418 if ( bl1_is_col_storage( b_rs, b_cs ) )
419 {
420 // requested operation: C_c += uplo( A_c ) * B_c
421 // effective operation: C_c += uplo( A_c ) * B_c
422 }
423 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
424 {
425 // requested operation: C_c += uplo( A_c ) * B_r
426 // effective operation: C_c += uplo( A_c ) * B_c
428 }
429 }
430 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
431 {
432 if ( bl1_is_col_storage( b_rs, b_cs ) )
433 {
434 // requested operation: C_c += uplo( A_r ) * B_c
435 // effective operation: C_c += ~uplo( conj( A_c ) ) * B_c
437
438 bl1_toggle_uplo( uplo );
439
441 }
442 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
443 {
444 // requested operation: C_c += uplo( A_r ) * B_r
445 // effective operation: C_c += ( B_c * ~uplo( conj( A_c ) ) )^T
448
450 bl1_toggle_uplo( uplo );
451
453 }
454 }
455 }
456 else // if ( bl1_is_row_storage( c_rs, c_cs ) )
457 {
458 if ( bl1_is_col_storage( a_rs, a_cs ) )
459 {
460 if ( bl1_is_col_storage( b_rs, b_cs ) )
461 {
462 // requested operation: C_r += uplo( A_c ) * B_c
463 // effective operation: C_c += ( uplo( A_c ) * B_c )^T
465
466 bl1_swap_ints( m, n );
467
469 }
470 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
471 {
472 // requested operation: C_r += uplo( A_c ) * B_r
473 // effective operation: C_c += B_c * ~uplo( conj( A_c ) )
476
477 bl1_swap_ints( m, n );
478
480
482 }
483 }
484 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
485 {
486 if ( bl1_is_col_storage( b_rs, b_cs ) )
487 {
488 // requested operation: C_r += uplo( A_r ) * B_c
489 // effective operation: C_c += B_c^T * ~uplo( A_c )
492
493 bl1_swap_ints( m, n );
494
496 bl1_toggle_uplo( uplo );
497
500 }
501 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
502 {
503 // requested operation: C_r += uplo( A_r ) * B_r
504 // effective operation: C_c += B_c * conj( ~uplo( A_c ) )
508
509 bl1_swap_ints( m, n );
510
511 bl1_toggle_uplo( uplo );
513 }
514 }
515 }
516
517 // We need a temporary matrix for the cases where A is conjugated.
518 a_conj = a;
519 lda_conj = lda;
520 inca_conj = inca;
521
523 {
524 int dim_a;
525
527
529 lda_conj = dim_a;
530 inca_conj = 1;
531
532 bl1_zcopymrt( uplo,
534 dim_a,
535 dim_a,
536 a, inca, lda,
538 }
539 else if ( hemm_needs_conja && a_was_copied )
540 {
541 int dim_a;
542
544
545 bl1_zconjmr( uplo,
546 dim_a,
547 dim_a,
549 }
550
551 // We need a temporary matrix for the cases where B needs to be copied.
552 b_copy = b;
553 ldb_copy = ldb;
554 incb_copy = incb;
555
556 // There are two cases where we need to make a copy of B: one where the
557 // copy's dimensions are transposed from the original B, and one where
558 // the dimensions are not swapped.
559 if ( hemm_needs_copyb )
560 {
562
563 // Set transb, which determines whether or not we need to copy from B
564 // as if it needs a transposition. If a transposition is needed, then
565 // m and n and have already been swapped. So in either case m
566 // represents the leading dimension of the copy.
569
570 b_copy = bl1_zallocm( m, n );
571 ldb_copy = m;
572 incb_copy = 1;
573
575 m,
576 n,
577 b, incb, ldb,
579 }
580
581 // There are two cases where we need to perform the hemm and then axpy
582 // the result into C with a transposition. We handle those cases here.
583 if ( hemm_needs_axpyt )
584 {
585 // We need a temporary matrix for holding C^T. Notice that m and n
586 // represent the dimensions of C, and thus C_trans is n-by-m
587 // (interpreting both as column-major matrices). So the leading
588 // dimension of the temporary matrix holding C^T is n.
589 c_trans = bl1_zallocm( n, m );
590 ldc_trans = n;
591 incc_trans = 1;
592
593 // Compute A * B (or B * A) and store the result in C_trans.
594 // Note that there is no overlap between the axpyt cases and
595 // the conja/copyb cases, hence the use of a, b, lda, and ldb.
597 uplo,
598 n,
599 m,
600 alpha,
601 a, lda,
602 b, ldb,
603 &zero,
605
606 // Scale C by beta.
608 m,
609 n,
610 beta,
611 c, incc, ldc );
612
613 // And finally, accumulate the matrix product in C_trans into C
614 // with a transpose.
616 m,
617 n,
618 &one,
620 c, incc, ldc );
621
622 // Free the temporary matrix for C.
624 }
625 else // no extra axpyt step needed
626 {
628 uplo,
629 m,
630 n,
631 alpha,
634 beta,
635 c, ldc );
636 }
637
639 bl1_zfree( a_conj );
640
641 if ( hemm_needs_copyb )
642 bl1_zfree( b_copy );
643
644 // Free any temporary contiguous matrices, copying the result back to
645 // the original matrix.
647 &a, &a_rs, &a_cs );
648
650 &b, &b_rs, &b_cs );
651
653 n_save,
655 &c, &c_rs, &c_cs );
656}
void bl1_zconjmr(uplo1_t uplo, int m, int n, dcomplex *a, int a_rs, int a_cs)
Definition bl1_conjmr.c:79
void bl1_zcopymrt(uplo1_t uplo, trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
Definition bl1_copymrt.c:328
void bl1_zhemm_blas(side1_t side, uplo1_t uplo, int m, int n, dcomplex *alpha, dcomplex *a, int lda, dcomplex *b, int ldb, dcomplex *beta, dcomplex *c, int ldc)
Definition bl1_hemm.c:699
void bl1_zcreate_contigmr(uplo1_t uplo, int m, int n, dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition bl1_create_contigmr.c:109

References bl1_is_col_storage(), bl1_set_dim_with_side(), bl1_z0(), bl1_z1(), bl1_zallocm(), bl1_zaxpymt(), bl1_zconjmr(), bl1_zcopymrt(), bl1_zcopymt(), bl1_zcreate_contigm(), bl1_zcreate_contigmr(), bl1_zero_dim2(), bl1_zfree(), bl1_zfree_contigm(), bl1_zfree_saved_contigm(), bl1_zhemm_blas(), bl1_zscalm(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, and BLIS1_TRANSPOSE.

Referenced by FLA_Hemm_external().

◆ bl1_zhemm_blas()

void bl1_zhemm_blas ( side1_t  side,
uplo1_t  uplo,
int  m,
int  n,
dcomplex alpha,
dcomplex a,
int  lda,
dcomplex b,
int  ldb,
dcomplex beta,
dcomplex c,
int  ldc 
)
700{
701#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
705
708
712 m,
713 n,
714 alpha,
715 a, lda,
716 b, ldb,
717 beta,
718 c, ldc );
719#else
720 char blas_side;
721 char blas_uplo;
722
725
727 &blas_uplo,
728 &m,
729 &n,
730 alpha,
731 a, &lda,
732 b, &ldb,
733 beta,
734 c, &ldc );
735#endif
736}
void F77_zhemm(char *side, char *uplo, int *m, int *n, dcomplex *alpha, dcomplex *a, int *lda, dcomplex *b, int *ldb, dcomplex *beta, dcomplex *c, int *ldc)
void cblas_zhemm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const void *beta, void *C, const int ldc)

References bl1_param_map_to_netlib_side(), bl1_param_map_to_netlib_uplo(), cblas_zhemm(), CblasColMajor, and F77_zhemm().

Referenced by bl1_zhemm().

◆ bl1_zher2k()

void bl1_zher2k ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
dcomplex alpha,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs,
double beta,
dcomplex c,
int  c_rs,
int  c_cs 
)
335{
336 uplo1_t uplo_save = uplo;
337 int m_save = m;
338 dcomplex* a_save = a;
339 dcomplex* b_save = b;
340 dcomplex* c_save = c;
341 int a_rs_save = a_rs;
342 int a_cs_save = a_cs;
343 int b_rs_save = b_rs;
344 int b_cs_save = b_cs;
345 int c_rs_save = c_rs;
346 int c_cs_save = c_cs;
347 double zero_r = bl1_d0();
348 dcomplex one = bl1_z1();
353 int lda, inca;
354 int ldb, incb;
355 int ldc, incc;
356 int lda_copy, inca_copy;
357 int ldb_copy, incb_copy;
358 int ldc_conj, incc_conj;
363
364 // Return early if possible.
365 if ( bl1_zero_dim2( m, k ) ) return;
366
367 // If necessary, allocate, initialize, and use a temporary contiguous
368 // copy of each matrix rather than the original matrices.
370 m,
371 k,
373 &a, &a_rs, &a_cs );
374
376 m,
377 k,
379 &b, &b_rs, &b_cs );
380
382 m,
383 m,
385 &c, &c_rs, &c_cs );
386
387 // Initialize with values assuming column-major storage.
388 lda = a_cs;
389 inca = a_rs;
390 ldb = b_cs;
391 incb = b_rs;
392 ldc = c_cs;
393 incc = c_rs;
394
395 // Adjust the parameters based on the storage of each matrix.
396 if ( bl1_is_col_storage( c_rs, c_cs ) )
397 {
398 if ( bl1_is_col_storage( a_rs, a_cs ) )
399 {
400 if ( bl1_is_col_storage( b_rs, b_cs ) )
401 {
402 // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
403 // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
404 }
405 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
406 {
407 // requested operation: uplo( C_c ) += A_c * B_r' + B_r * A_c'
408 // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
410 }
411 }
412 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
413 {
414 if ( bl1_is_col_storage( b_rs, b_cs ) )
415 {
416 // requested operation: uplo( C_c ) += A_r * B_c' + B_c * A_r'
417 // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
419 }
420 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
421 {
422 // requested operation: uplo( C_c ) += A_r * B_r' + B_r * A_r'
423 // requested operation: uplo( C_c ) += conj( A_c' * B_c + B_c' * A_c )
426
428
431 }
432 }
433 }
434 else // if ( bl1_is_row_storage( c_rs, c_cs ) )
435 {
436 if ( bl1_is_col_storage( a_rs, a_cs ) )
437 {
438 if ( bl1_is_col_storage( b_rs, b_cs ) )
439 {
440 // requested operation: uplo( C_r ) += A_c * B_c' + B_c * A_c'
441 // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
443
444 bl1_toggle_uplo( uplo );
445
447 }
448 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
449 {
450 // requested operation: uplo( C_r ) += A_c * B_r' + B_r * A_c'
451 // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
453
455
456 bl1_toggle_uplo( uplo );
457
459 }
460 }
461 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
462 {
463 if ( bl1_is_col_storage( b_rs, b_cs ) )
464 {
465 // requested operation: uplo( C_r ) += A_r * B_c' + B_c * A_r'
466 // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
468
470
471 bl1_toggle_uplo( uplo );
472
474 }
475 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
476 {
477 // requested operation: uplo( C_r ) += A_r * B_r' + B_r * A_r'
478 // requested operation: ~uplo( C_c ) += A_c' * B_c + B_c' * A_c
482
483 bl1_toggle_uplo( uplo );
485
487 }
488 }
489 }
490
491 // Make a copy of alpha and conjugate if necessary.
492 alpha_copy = *alpha;
494 {
496 }
497
498 a_copy = a;
499 lda_copy = lda;
500 inca_copy = inca;
501
502 // There are two cases where we need to copy A column-major storage.
503 // We handle those two cases here.
504 if ( her2k_needs_copya )
505 {
506 int m_a;
507 int n_a;
508
509 // Determine the dimensions of A according to the value of trans. We
510 // need this in order to set the leading dimension of the copy of A.
512
513 // We need a temporary matrix to hold a column-major copy of A.
514 a_copy = bl1_zallocm( m, k );
515 lda_copy = m_a;
516 inca_copy = 1;
517
518 // Copy the contents of A into A_copy.
520 m_a,
521 n_a,
522 a, inca, lda,
524 }
525
526 b_copy = b;
527 ldb_copy = ldb;
528 incb_copy = incb;
529
530 // There are two cases where we need to copy B column-major storage.
531 // We handle those two cases here.
532 if ( her2k_needs_copyb )
533 {
534 int m_b;
535 int n_b;
536
537 // Determine the dimensions of B according to the value of trans. We
538 // need this in order to set the leading dimension of the copy of B.
540
541 // We need a temporary matrix to hold a column-major copy of B.
542 b_copy = bl1_zallocm( m, k );
543 ldb_copy = m_b;
544 incb_copy = 1;
545
546 // Copy the contents of B into B_copy.
548 m_b,
549 n_b,
550 b, incb, ldb,
552 }
553
554 // There are two cases where we need to perform the rank-2k product and
555 // then axpy the result into C with a conjugation. We handle those two
556 // cases here.
557 if ( her2k_needs_conj )
558 {
559 // We need a temporary matrix for holding the rank-k product.
560 c_conj = bl1_zallocm( m, m );
561 ldc_conj = m;
562 incc_conj = 1;
563
564 // Compute the rank-2k product.
565 bl1_zher2k_blas( uplo,
566 trans,
567 m,
568 k,
569 &alpha_copy,
572 &zero_r,
573 c_conj, ldc_conj );
574
575 // Scale C by beta.
576 bl1_zdscalmr( uplo,
577 m,
578 m,
579 beta,
580 c, incc, ldc );
581
582 // And finally, accumulate the rank-2k product in C_conj into C
583 // with a conjugation.
584 bl1_zaxpymrt( uplo,
586 m,
587 m,
588 &one,
590 c, incc, ldc );
591
592 // Free the temporary matrix for C.
593 bl1_zfree( c_conj );
594 }
595 else
596 {
597 bl1_zher2k_blas( uplo,
598 trans,
599 m,
600 k,
601 &alpha_copy,
604 beta,
605 c, ldc );
606 }
607
608 if ( her2k_needs_copya )
609 bl1_zfree( a_copy );
610
611 if ( her2k_needs_copyb )
612 bl1_zfree( b_copy );
613
614 // Free any temporary contiguous matrices, copying the result back to
615 // the original matrix.
617 &a, &a_rs, &a_cs );
618
620 &b, &b_rs, &b_cs );
621
623 m_save,
624 m_save,
626 &c, &c_rs, &c_cs );
627}
void bl1_zaxpymrt(uplo1_t uplo, trans1_t trans, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
Definition bl1_axpymrt.c:334
void bl1_zher2k_blas(uplo1_t uplo, trans1_t trans, int m, int k, dcomplex *alpha, dcomplex *a, int lda, dcomplex *b, int ldb, double *beta, dcomplex *c, int ldc)
Definition bl1_her2k.c:670
void bl1_zdscalmr(uplo1_t uplo, int m, int n, double *alpha, dcomplex *a, int a_rs, int a_cs)
Definition bl1_scalmr.c:237
void bl1_zfree_saved_contigmr(uplo1_t uplo, int m, int n, dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition bl1_free_saved_contigmr.c:82

References bl1_d0(), bl1_is_col_storage(), bl1_set_dims_with_trans(), bl1_z1(), bl1_zallocm(), bl1_zaxpymrt(), bl1_zcopymt(), bl1_zcreate_contigmr(), bl1_zcreate_contigmt(), bl1_zdscalmr(), bl1_zero_dim2(), bl1_zfree(), bl1_zfree_contigm(), bl1_zfree_saved_contigmr(), bl1_zher2k_blas(), BLIS1_CONJ_NO_TRANSPOSE, and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Her2k_external().

◆ bl1_zher2k_blas()

void bl1_zher2k_blas ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
dcomplex alpha,
dcomplex a,
int  lda,
dcomplex b,
int  ldb,
double beta,
dcomplex c,
int  ldc 
)
671{
672#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
676
679
683 m,
684 k,
685 alpha,
686 a, lda,
687 b, ldb,
688 *beta,
689 c, ldc );
690#else
691 char blas_uplo;
692 char blas_trans;
693
696
698 &blas_trans,
699 &m,
700 &k,
701 alpha,
702 a, &lda,
703 b, &ldb,
704 beta,
705 c, &ldc );
706#endif
707}
void F77_zher2k(char *uplo, char *transa, int *n, int *k, dcomplex *alpha, dcomplex *a, int *lda, dcomplex *b, int *ldb, double *beta, dcomplex *c, int *ldc)
void cblas_zher2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const double beta, void *C, const int ldc)

References bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), cblas_zher2k(), CblasColMajor, and F77_zher2k().

Referenced by bl1_zher2k().

◆ bl1_zherk()

void bl1_zherk ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
double alpha,
dcomplex a,
int  a_rs,
int  a_cs,
double beta,
dcomplex c,
int  c_rs,
int  c_cs 
)
185{
186 uplo1_t uplo_save = uplo;
187 int m_save = m;
188 dcomplex* a_save = a;
189 dcomplex* c_save = c;
190 int a_rs_save = a_rs;
191 int a_cs_save = a_cs;
192 int c_rs_save = c_rs;
193 int c_cs_save = c_cs;
194 double zero_r = bl1_d0();
195 dcomplex one = bl1_z1();
197 int lda, inca;
198 int ldc, incc;
199 int ldc_conj, incc_conj;
201
202 // Return early if possible.
203 if ( bl1_zero_dim2( m, k ) ) return;
204
205 // If necessary, allocate, initialize, and use a temporary contiguous
206 // copy of each matrix rather than the original matrices.
208 m,
209 k,
211 &a, &a_rs, &a_cs );
212
214 m,
215 m,
217 &c, &c_rs, &c_cs );
218
219 // Initialize with values assuming column-major storage.
220 lda = a_cs;
221 inca = a_rs;
222 ldc = c_cs;
223 incc = c_rs;
224
225 // Adjust the parameters based on the storage of each matrix.
226 if ( bl1_is_col_storage( c_rs, c_cs ) )
227 {
228 if ( bl1_is_col_storage( a_rs, a_cs ) )
229 {
230 // requested operation: uplo( C_c ) += A_c * A_c'
231 // effective operation: uplo( C_c ) += A_c * A_c'
232 }
233 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
234 {
235 // requested operation: uplo( C_c ) += A_r * A_r'
236 // effective operation: uplo( C_c ) += conj( A_c' * A_c )
238
240
242 }
243 }
244 else // if ( bl1_is_row_storage( c_rs, c_cs ) )
245 {
246 if ( bl1_is_col_storage( a_rs, a_cs ) )
247 {
248 // requested operation: uplo( C_r ) += A_c * A_c'
249 // effective operation: ~uplo( C_c ) += conj( A_c * A_c' )
251
252 bl1_toggle_uplo( uplo );
253
255 }
256 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
257 {
258 // requested operation: uplo( C_r ) += A_r * A_r'
259 // effective operation: ~uplo( C_c ) += A_c' * A_c
262
263 bl1_toggle_uplo( uplo );
265 }
266 }
267
268 // There are two cases where we need to perform the rank-k product and
269 // then axpy the result into C with a conjugation. We handle those two
270 // cases here.
271 if ( herk_needs_conj )
272 {
273 // We need a temporary matrix for holding the rank-k product.
274 c_conj = bl1_zallocm( m, m );
275 ldc_conj = m;
276 incc_conj = 1;
277
278 // Compute the rank-k product.
279 bl1_zherk_blas( uplo,
280 trans,
281 m,
282 k,
283 alpha,
284 a, lda,
285 &zero_r,
286 c_conj, ldc_conj );
287
288 // Scale C by beta.
289 bl1_zdscalmr( uplo,
290 m,
291 m,
292 beta,
293 c, incc, ldc );
294
295 // And finally, accumulate the rank-k product in C_conj into C
296 // with a conjugation.
297 bl1_zaxpymrt( uplo,
299 m,
300 m,
301 &one,
303 c, incc, ldc );
304
305 // Free the temporary matrix for C.
306 bl1_zfree( c_conj );
307 }
308 else
309 {
310 bl1_zherk_blas( uplo,
311 trans,
312 m,
313 k,
314 alpha,
315 a, lda,
316 beta,
317 c, ldc );
318 }
319
320 // Free any temporary contiguous matrices, copying the result back to
321 // the original matrix.
323 &a, &a_rs, &a_cs );
324
326 m_save,
327 m_save,
329 &c, &c_rs, &c_cs );
330}
void bl1_zherk_blas(uplo1_t uplo, trans1_t trans, int m, int k, double *alpha, dcomplex *a, int lda, double *beta, dcomplex *c, int ldc)
Definition bl1_herk.c:371

References bl1_d0(), bl1_is_col_storage(), bl1_z1(), bl1_zallocm(), bl1_zaxpymrt(), bl1_zcreate_contigmr(), bl1_zcreate_contigmt(), bl1_zdscalmr(), bl1_zero_dim2(), bl1_zfree(), bl1_zfree_contigm(), bl1_zfree_saved_contigmr(), bl1_zherk_blas(), and BLIS1_CONJ_NO_TRANSPOSE.

Referenced by FLA_Herk_external(), and FLA_UDdate_UT_opz_var1().

◆ bl1_zherk_blas()

void bl1_zherk_blas ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
double alpha,
dcomplex a,
int  lda,
double beta,
dcomplex c,
int  ldc 
)
372{
373#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
377
380
384 m,
385 k,
386 *alpha,
387 a, lda,
388 *beta,
389 c, ldc );
390#else
391 char blas_uplo;
392 char blas_trans;
393
396
398 &blas_trans,
399 &m,
400 &k,
401 alpha,
402 a, &lda,
403 beta,
404 c, &ldc );
405#endif
406}
void F77_zherk(char *uplo, char *transa, int *n, int *k, double *alpha, dcomplex *a, int *lda, double *beta, dcomplex *c, int *ldc)
void cblas_zherk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const double alpha, const void *A, const int lda, const double beta, void *C, const int ldc)

References bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), cblas_zherk(), CblasColMajor, and F77_zherk().

Referenced by bl1_zherk().

◆ bl1_zsymm()

void bl1_zsymm ( side1_t  side,
uplo1_t  uplo,
int  m,
int  n,
dcomplex alpha,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs,
dcomplex beta,
dcomplex c,
int  c_rs,
int  c_cs 
)
797{
798 int m_save = m;
799 int n_save = n;
800 dcomplex* a_save = a;
801 dcomplex* b_save = b;
802 dcomplex* c_save = c;
803 int a_rs_save = a_rs;
804 int a_cs_save = a_cs;
805 int b_rs_save = b_rs;
806 int b_cs_save = b_cs;
807 int c_rs_save = c_rs;
808 int c_cs_save = c_cs;
809 dcomplex zero = bl1_z0();
810 dcomplex one = bl1_z1();
813 int dim_a;
814 int lda, inca;
815 int ldb, incb;
816 int ldc, incc;
817 int ldb_copy, incb_copy;
822
823 // Return early if possible.
824 if ( bl1_zero_dim2( m, n ) ) return;
825
826 // If necessary, allocate, initialize, and use a temporary contiguous
827 // copy of each matrix rather than the original matrices.
830 dim_a,
831 dim_a,
833 &a, &a_rs, &a_cs );
834
836 n,
838 &b, &b_rs, &b_cs );
839
841 n,
843 &c, &c_rs, &c_cs );
844
845 // Initialize with values assuming column-major storage.
846 lda = a_cs;
847 inca = a_rs;
848 ldb = b_cs;
849 incb = b_rs;
850 ldc = c_cs;
851 incc = c_rs;
852
853 // Adjust the parameters based on the storage of each matrix.
854 if ( bl1_is_col_storage( c_rs, c_cs ) )
855 {
856 if ( bl1_is_col_storage( a_rs, a_cs ) )
857 {
858 if ( bl1_is_col_storage( b_rs, b_cs ) )
859 {
860 // requested operation: C_c += uplo( A_c ) * B_c
861 // effective operation: C_c += uplo( A_c ) * B_c
862 }
863 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
864 {
865 // requested operation: C_c += uplo( A_c ) * B_r
866 // effective operation: C_c += uplo( A_c ) * B_c
868 }
869 }
870 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
871 {
872 if ( bl1_is_col_storage( b_rs, b_cs ) )
873 {
874 // requested operation: C_c += uplo( A_r ) * B_c
875 // effective operation: C_c += ~uplo( conj( A_c ) ) * B_c
877
878 bl1_toggle_uplo( uplo );
879 }
880 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
881 {
882 // requested operation: C_c += uplo( A_r ) * B_r
883 // effective operation: C_c += ( B_c * ~uplo( conj( A_c ) ) )^T
886
888 bl1_toggle_uplo( uplo );
889
891 }
892 }
893 }
894 else // if ( bl1_is_row_storage( c_rs, c_cs ) )
895 {
896 if ( bl1_is_col_storage( a_rs, a_cs ) )
897 {
898 if ( bl1_is_col_storage( b_rs, b_cs ) )
899 {
900 // requested operation: C_r += uplo( A_c ) * B_c
901 // effective operation: C_c += ( uplo( A_c ) * B_c )^T
903
904 bl1_swap_ints( m, n );
905
907 }
908 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
909 {
910 // requested operation: C_r += uplo( A_c ) * B_r
911 // effective operation: C_c += B_c * ~uplo( conj( A_c ) )
914
915 bl1_swap_ints( m, n );
916
918 }
919 }
920 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
921 {
922 if ( bl1_is_col_storage( b_rs, b_cs ) )
923 {
924 // requested operation: C_r += uplo( A_r ) * B_c
925 // effective operation: C_c += B_c^T * ~uplo( A_c )
928
929 bl1_swap_ints( m, n );
930
932 bl1_toggle_uplo( uplo );
933
936 }
937 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
938 {
939 // requested operation: C_r += uplo( A_r ) * B_r
940 // effective operation: C_c += B_c * conj( ~uplo( A_c ) )
944
945 bl1_swap_ints( m, n );
946
947 bl1_toggle_uplo( uplo );
949 }
950 }
951 }
952
953 // We need a temporary matrix for the cases where B needs to be copied.
954 b_copy = b;
955 ldb_copy = ldb;
956 incb_copy = incb;
957
958 // There are two cases where we need to make a copy of B: one where the
959 // copy's dimensions are transposed from the original B, and one where
960 // the dimensions are not swapped.
961 if ( symm_needs_copyb )
962 {
964
965 // Set transb, which determines whether or not we need to copy from B
966 // as if it needs a transposition. If a transposition is needed, then
967 // m and n and have already been swapped. So in either case m
968 // represents the leading dimension of the copy.
971
972 b_copy = bl1_zallocm( m, n );
973 ldb_copy = m;
974 incb_copy = 1;
975
977 m,
978 n,
979 b, incb, ldb,
981 }
982
983 // There are two cases where we need to perform the symm and then axpy
984 // the result into C with a transposition. We handle those cases here.
985 if ( symm_needs_axpyt )
986 {
987 // We need a temporary matrix for holding C^T. Notice that m and n
988 // represent the dimensions of C, and thus C_trans is n-by-m
989 // (interpreting both as column-major matrices). So the leading
990 // dimension of the temporary matrix holding C^T is n.
991 c_trans = bl1_zallocm( n, m );
992 ldc_trans = n;
993 incc_trans = 1;
994
995 // Compute A * B (or B * A) and store the result in C_trans.
996 // Note that there is no overlap between the axpyt cases and
997 // the conja/copyb cases, hence the use of a, b, lda, and ldb.
999 uplo,
1000 n,
1001 m,
1002 alpha,
1003 a, lda,
1004 b, ldb,
1005 &zero,
1006 c_trans, ldc_trans );
1007
1008 // Scale C by beta.
1010 m,
1011 n,
1012 beta,
1013 c, incc, ldc );
1014
1015 // And finally, accumulate the matrix product in C_trans into C
1016 // with a transpose.
1018 m,
1019 n,
1020 &one,
1022 c, incc, ldc );
1023
1024 // Free the temporary matrix for C.
1025 bl1_zfree( c_trans );
1026 }
1027 else // no extra axpyt step needed
1028 {
1030 uplo,
1031 m,
1032 n,
1033 alpha,
1034 a, lda,
1036 beta,
1037 c, ldc );
1038 }
1039
1040 if ( symm_needs_copyb )
1041 bl1_zfree( b_copy );
1042
1043 // Free any temporary contiguous matrices, copying the result back to
1044 // the original matrix.
1046 &a, &a_rs, &a_cs );
1047
1049 &b, &b_rs, &b_cs );
1050
1052 n_save,
1054 &c, &c_rs, &c_cs );
1055}
void bl1_zsymm_blas(side1_t side, uplo1_t uplo, int m, int n, dcomplex *alpha, dcomplex *a, int lda, dcomplex *b, int ldb, dcomplex *beta, dcomplex *c, int ldc)
Definition bl1_symm.c:1176

References bl1_is_col_storage(), bl1_set_dim_with_side(), bl1_z0(), bl1_z1(), bl1_zallocm(), bl1_zaxpymt(), bl1_zcopymt(), bl1_zcreate_contigm(), bl1_zcreate_contigmr(), bl1_zero_dim2(), bl1_zfree(), bl1_zfree_contigm(), bl1_zfree_saved_contigm(), bl1_zscalm(), bl1_zsymm_blas(), BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, and BLIS1_TRANSPOSE.

Referenced by FLA_Symm_external().

◆ bl1_zsymm_blas()

void bl1_zsymm_blas ( side1_t  side,
uplo1_t  uplo,
int  m,
int  n,
dcomplex alpha,
dcomplex a,
int  lda,
dcomplex b,
int  ldb,
dcomplex beta,
dcomplex c,
int  ldc 
)
1177{
1178#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
1182
1185
1187 cblas_side,
1188 cblas_uplo,
1189 m,
1190 n,
1191 alpha,
1192 a, lda,
1193 b, ldb,
1194 beta,
1195 c, ldc );
1196#else
1197 char blas_side;
1198 char blas_uplo;
1199
1202
1204 &blas_uplo,
1205 &m,
1206 &n,
1207 alpha,
1208 a, &lda,
1209 b, &ldb,
1210 beta,
1211 c, &ldc );
1212#endif
1213}
void F77_zsymm(char *side, char *uplo, int *m, int *n, dcomplex *alpha, dcomplex *a, int *lda, dcomplex *b, int *ldb, dcomplex *beta, dcomplex *c, int *ldc)
void cblas_zsymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const void *beta, void *C, const int ldc)

References bl1_param_map_to_netlib_side(), bl1_param_map_to_netlib_uplo(), cblas_zsymm(), CblasColMajor, and F77_zsymm().

Referenced by bl1_zsymm().

◆ bl1_zsyr2k()

void bl1_zsyr2k ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
dcomplex alpha,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs,
dcomplex beta,
dcomplex c,
int  c_rs,
int  c_cs 
)
692{
693 uplo1_t uplo_save = uplo;
694 int m_save = m;
695 dcomplex* a_save = a;
696 dcomplex* b_save = b;
697 dcomplex* c_save = c;
698 int a_rs_save = a_rs;
699 int a_cs_save = a_cs;
700 int b_rs_save = b_rs;
701 int b_cs_save = b_cs;
702 int c_rs_save = c_rs;
703 int c_cs_save = c_cs;
706 int lda, inca;
707 int ldb, incb;
708 int ldc, incc;
709 int lda_copy, inca_copy;
710 int ldb_copy, incb_copy;
713
714 // Return early if possible.
715 if ( bl1_zero_dim2( m, k ) ) return;
716
717 // If necessary, allocate, initialize, and use a temporary contiguous
718 // copy of each matrix rather than the original matrices.
720 m,
721 k,
723 &a, &a_rs, &a_cs );
724
726 m,
727 k,
729 &b, &b_rs, &b_cs );
730
732 m,
733 m,
735 &c, &c_rs, &c_cs );
736
737 // Initialize with values assuming column-major storage.
738 lda = a_cs;
739 inca = a_rs;
740 ldb = b_cs;
741 incb = b_rs;
742 ldc = c_cs;
743 incc = c_rs;
744
745 // Adjust the parameters based on the storage of each matrix.
746 if ( bl1_is_col_storage( c_rs, c_cs ) )
747 {
748 if ( bl1_is_col_storage( a_rs, a_cs ) )
749 {
750 if ( bl1_is_col_storage( b_rs, b_cs ) )
751 {
752 // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
753 // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
754 }
755 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
756 {
757 // requested operation: uplo( C_c ) += A_c * B_r' + B_r * A_c'
758 // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
760 }
761 }
762 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
763 {
764 if ( bl1_is_col_storage( b_rs, b_cs ) )
765 {
766 // requested operation: uplo( C_c ) += A_r * B_c' + B_c * A_r'
767 // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
769 }
770 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
771 {
772 // requested operation: uplo( C_c ) += A_r * B_r' + B_r * A_r'
773 // requested operation: uplo( C_c ) += conj( A_c' * B_c + B_c' * A_c )
776
778 }
779 }
780 }
781 else // if ( bl1_is_row_storage( c_rs, c_cs ) )
782 {
783 if ( bl1_is_col_storage( a_rs, a_cs ) )
784 {
785 if ( bl1_is_col_storage( b_rs, b_cs ) )
786 {
787 // requested operation: uplo( C_r ) += A_c * B_c' + B_c * A_c'
788 // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
790
791 bl1_toggle_uplo( uplo );
792 }
793 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
794 {
795 // requested operation: uplo( C_r ) += A_c * B_r' + B_r * A_c'
796 // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
798
800
801 bl1_toggle_uplo( uplo );
802 }
803 }
804 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
805 {
806 if ( bl1_is_col_storage( b_rs, b_cs ) )
807 {
808 // requested operation: uplo( C_r ) += A_r * B_c' + B_c * A_r'
809 // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
811
813
814 bl1_toggle_uplo( uplo );
815 }
816 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
817 {
818 // requested operation: uplo( C_r ) += A_r * B_r' + B_r * A_r'
819 // requested operation: ~uplo( C_c ) += A_c' * B_c + B_c' * A_c
823
824 bl1_toggle_uplo( uplo );
826 }
827 }
828 }
829
830 a_copy = a;
831 lda_copy = lda;
832 inca_copy = inca;
833
834 // There are two cases where we need to copy A column-major storage.
835 // We handle those two cases here.
836 if ( syr2k_needs_copya )
837 {
838 int m_a;
839 int n_a;
840
841 // Determine the dimensions of A according to the value of trans. We
842 // need this in order to set the leading dimension of the copy of A.
844
845 // We need a temporary matrix to hold a column-major copy of A.
846 a_copy = bl1_zallocm( m, k );
847 lda_copy = m_a;
848 inca_copy = 1;
849
850 // Copy the contents of A into A_copy.
852 m_a,
853 n_a,
854 a, inca, lda,
856 }
857
858 b_copy = b;
859 ldb_copy = ldb;
860 incb_copy = incb;
861
862 // There are two cases where we need to copy B column-major storage.
863 // We handle those two cases here.
864 if ( syr2k_needs_copyb )
865 {
866 int m_b;
867 int n_b;
868
869 // Determine the dimensions of B according to the value of trans. We
870 // need this in order to set the leading dimension of the copy of B.
872
873 // We need a temporary matrix to hold a column-major copy of B.
874 b_copy = bl1_zallocm( m, k );
875 ldb_copy = m_b;
876 incb_copy = 1;
877
878 // Copy the contents of B into B_copy.
880 m_b,
881 n_b,
882 b, incb, ldb,
884 }
885
886 bl1_zsyr2k_blas( uplo,
887 trans,
888 m,
889 k,
890 alpha,
893 beta,
894 c, ldc );
895
896 if ( syr2k_needs_copya )
897 bl1_zfree( a_copy );
898
899 if ( syr2k_needs_copyb )
900 bl1_zfree( b_copy );
901
902 // Free any temporary contiguous matrices, copying the result back to
903 // the original matrix.
905 &a, &a_rs, &a_cs );
906
908 &b, &b_rs, &b_cs );
909
911 m_save,
912 m_save,
914 &c, &c_rs, &c_cs );
915}
void bl1_zsyr2k_blas(uplo1_t uplo, trans1_t trans, int m, int k, dcomplex *alpha, dcomplex *a, int lda, dcomplex *b, int ldb, dcomplex *beta, dcomplex *c, int ldc)
Definition bl1_syr2k.c:1060

References bl1_is_col_storage(), bl1_set_dims_with_trans(), bl1_zallocm(), bl1_zcopymt(), bl1_zcreate_contigmr(), bl1_zcreate_contigmt(), bl1_zero_dim2(), bl1_zfree(), bl1_zfree_contigm(), bl1_zfree_saved_contigmr(), bl1_zsyr2k_blas(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Syr2k_external().

◆ bl1_zsyr2k_blas()

void bl1_zsyr2k_blas ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
dcomplex alpha,
dcomplex a,
int  lda,
dcomplex b,
int  ldb,
dcomplex beta,
dcomplex c,
int  ldc 
)
1061{
1062#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
1066
1067 // BLAS doesn't recognize the conjugate-transposition constant for syr2k,
1068 // so we have to map it down to regular transposition.
1070
1073
1075 cblas_uplo,
1077 m,
1078 k,
1079 alpha,
1080 a, lda,
1081 b, ldb,
1082 beta,
1083 c, ldc );
1084#else
1085 char blas_uplo;
1086 char blas_trans;
1087
1088 // BLAS doesn't recognize the conjugate-transposition constant for syr2k,
1089 // so we have to map it down to regular transposition.
1091
1094
1096 &blas_trans,
1097 &m,
1098 &k,
1099 alpha,
1100 a, &lda,
1101 b, &ldb,
1102 beta,
1103 c, &ldc );
1104#endif
1105}
void F77_zsyr2k(char *uplo, char *transa, int *n, int *k, dcomplex *alpha, dcomplex *a, int *lda, dcomplex *b, int *ldb, dcomplex *beta, dcomplex *c, int *ldc)
void cblas_zsyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const void *beta, void *C, const int ldc)

References bl1_is_conjtrans(), bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), BLIS1_TRANSPOSE, cblas_zsyr2k(), CblasColMajor, and F77_zsyr2k().

Referenced by bl1_zsyr2k().

◆ bl1_zsyrk()

void bl1_zsyrk ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
dcomplex alpha,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex beta,
dcomplex c,
int  c_rs,
int  c_cs 
)
302{
303 uplo1_t uplo_save = uplo;
304 int m_save = m;
305 dcomplex* a_save = a;
306 dcomplex* c_save = c;
307 int a_rs_save = a_rs;
308 int a_cs_save = a_cs;
309 int c_rs_save = c_rs;
310 int c_cs_save = c_cs;
311 int lda, inca;
312 int ldc, incc;
313
314 // Return early if possible.
315 if ( bl1_zero_dim2( m, k ) ) return;
316
317 // If necessary, allocate, initialize, and use a temporary contiguous
318 // copy of each matrix rather than the original matrices.
320 m,
321 k,
323 &a, &a_rs, &a_cs );
324
326 m,
327 m,
329 &c, &c_rs, &c_cs );
330
331 // Initialize with values assuming column-major storage.
332 lda = a_cs;
333 inca = a_rs;
334 ldc = c_cs;
335 incc = c_rs;
336
337 // Adjust the parameters based on the storage of each matrix.
338 if ( bl1_is_col_storage( c_rs, c_cs ) )
339 {
340 if ( bl1_is_col_storage( a_rs, a_cs ) )
341 {
342 // requested operation: uplo( C_c ) += A_c * A_c^T
343 // effective operation: uplo( C_c ) += A_c * A_c^T
344 }
345 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
346 {
347 // requested operation: uplo( C_c ) += A_r * A_r^T
348 // effective operation: uplo( C_c ) += A_c^T * A_c
350
352 }
353 }
354 else // if ( bl1_is_row_storage( c_rs, c_cs ) )
355 {
356 if ( bl1_is_col_storage( a_rs, a_cs ) )
357 {
358 // requested operation: uplo( C_r ) += A_c * A_c^T
359 // effective operation: ~uplo( C_c ) += A_c * A_c^T
361
362 bl1_toggle_uplo( uplo );
363 }
364 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
365 {
366 // requested operation: uplo( C_r ) += A_r * A_r^T
367 // effective operation: ~uplo( C_c ) += A_c^T * A_c
370
371 bl1_toggle_uplo( uplo );
373 }
374 }
375
376 bl1_zsyrk_blas( uplo,
377 trans,
378 m,
379 k,
380 alpha,
381 a, lda,
382 beta,
383 c, ldc );
384
385 // Free any temporary contiguous matrices, copying the result back to
386 // the original matrix.
388 &a, &a_rs, &a_cs );
389
391 m_save,
392 m_save,
394 &c, &c_rs, &c_cs );
395}
void bl1_zsyrk_blas(uplo1_t uplo, trans1_t trans, int m, int k, dcomplex *alpha, dcomplex *a, int lda, dcomplex *beta, dcomplex *c, int ldc)
Definition bl1_syrk.c:510

References bl1_is_col_storage(), bl1_zcreate_contigmr(), bl1_zcreate_contigmt(), bl1_zero_dim2(), bl1_zfree_contigm(), bl1_zfree_saved_contigmr(), and bl1_zsyrk_blas().

Referenced by FLA_Syrk_external().

◆ bl1_zsyrk_blas()

void bl1_zsyrk_blas ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
dcomplex alpha,
dcomplex a,
int  lda,
dcomplex beta,
dcomplex c,
int  ldc 
)
511{
512#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
516
519
523 m,
524 k,
525 alpha,
526 a, lda,
527 beta,
528 c, ldc );
529#else
530 char blas_uplo;
531 char blas_trans;
532
535
537 &blas_trans,
538 &m,
539 &k,
540 alpha,
541 a, &lda,
542 beta,
543 c, &ldc );
544#endif
545}
void F77_zsyrk(char *uplo, char *transa, int *n, int *k, dcomplex *alpha, dcomplex *a, int *lda, dcomplex *beta, dcomplex *c, int *ldc)
void cblas_zsyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const void *alpha, const void *A, const int lda, const void *beta, void *C, const int ldc)

References bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), cblas_zsyrk(), CblasColMajor, and F77_zsyrk().

Referenced by bl1_zsyrk().

◆ bl1_ztrmm()

void bl1_ztrmm ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
dcomplex alpha,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)
370{
371 int m_save = m;
372 int n_save = n;
373 dcomplex* a_save = a;
374 dcomplex* b_save = b;
375 int a_rs_save = a_rs;
376 int a_cs_save = a_cs;
377 int b_rs_save = b_rs;
378 int b_cs_save = b_cs;
380 int dim_a;
381 int lda, inca;
382 int ldb, incb;
383 int lda_conj, inca_conj;
384 int a_was_copied;
385
386 // Return early if possible.
387 if ( bl1_zero_dim2( m, n ) ) return;
388
389 // If necessary, allocate, initialize, and use a temporary contiguous
390 // copy of each matrix rather than the original matrices.
393 dim_a,
394 dim_a,
396 &a, &a_rs, &a_cs );
397
399 n,
401 &b, &b_rs, &b_cs );
402
403 // Figure out whether A was copied to contiguous memory. This is used to
404 // prevent redundant copying.
405 a_was_copied = ( a != a_save );
406
407 // Initialize with values assuming column-major storage.
408 lda = a_cs;
409 inca = a_rs;
410 ldb = b_cs;
411 incb = b_rs;
412
413 // Adjust the parameters based on the storage of each matrix.
414 if ( bl1_is_col_storage( b_rs, b_cs ) )
415 {
416 if ( bl1_is_col_storage( a_rs, a_cs ) )
417 {
418 // requested operation: B_c := tr( uplo( A_c ) ) * B_c
419 // effective operation: B_c := tr( uplo( A_c ) ) * B_c
420 }
421 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
422 {
423 // requested operation: B_c := tr( uplo( A_r ) ) * B_c
424 // effective operation: B_c := tr( ~uplo( A_c ) )^T * B_c
426
427 bl1_toggle_uplo( uplo );
429 }
430 }
431 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
432 {
433 if ( bl1_is_col_storage( a_rs, a_cs ) )
434 {
435 // requested operation: B_r := tr( uplo( A_c ) ) * B_r
436 // effective operation: B_c := B_c * tr( uplo( A_c ) )^T
438
439 bl1_swap_ints( m, n );
440
443 }
444 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
445 {
446 // requested operation: B_r := tr( uplo( A_r ) ) * B_r
447 // effective operation: B_c := B_c * tr( ~uplo( A_c ) )
450
451 bl1_swap_ints( m, n );
452
454 bl1_toggle_uplo( uplo );
455 }
456 }
457
458 // Initialize with values assuming that trans is not conjnotrans.
459 a_conj = a;
460 lda_conj = lda;
461 inca_conj = inca;
462
463 // We want to handle the conjnotrans case. The easiest way to do so is
464 // by making a conjugated copy of A.
466 {
467 int dim_a;
468
470
472 lda_conj = dim_a;
473 inca_conj = 1;
474
475 bl1_zcopymrt( uplo,
477 dim_a,
478 dim_a,
479 a, inca, lda,
481 }
482 else if ( bl1_is_conjnotrans( trans ) && a_was_copied )
483 {
484 int dim_a;
485
487
488 bl1_zconjmr( uplo,
489 dim_a,
490 dim_a,
492 }
493
495 uplo,
496 trans,
497 diag,
498 m,
499 n,
500 alpha,
502 b, ldb );
503
505 bl1_zfree( a_conj );
506
507 // Free any temporary contiguous matrices, copying the result back to
508 // the original matrix.
510 &a, &a_rs, &a_cs );
511
513 n_save,
515 &b, &b_rs, &b_cs );
516}
void bl1_ztrmm_blas(side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, dcomplex *alpha, dcomplex *a, int lda, dcomplex *b, int ldb)
Definition bl1_trmm.c:661

References bl1_is_col_storage(), bl1_is_conjnotrans(), bl1_set_dim_with_side(), bl1_zallocm(), bl1_zconjmr(), bl1_zcopymrt(), bl1_zcreate_contigm(), bl1_zcreate_contigmr(), bl1_zero_dim2(), bl1_zfree(), bl1_zfree_contigm(), bl1_zfree_saved_contigm(), bl1_ztrmm_blas(), and BLIS1_CONJ_NO_TRANSPOSE.

Referenced by bl1_ztrmmsx(), and FLA_Trmm_external().

◆ bl1_ztrmm_blas()

void bl1_ztrmm_blas ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
dcomplex alpha,
dcomplex a,
int  lda,
dcomplex b,
int  ldb 
)
662{
663#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
669
674
680 m,
681 n,
682 alpha,
683 a, lda,
684 b, ldb );
685#else
686 char blas_side;
687 char blas_uplo;
688 char blas_trans;
689 char blas_diag;
690
695
697 &blas_uplo,
698 &blas_trans,
699 &blas_diag,
700 &m,
701 &n,
702 alpha,
703 a, &lda,
704 b, &ldb );
705#endif
706}
void F77_ztrmm(char *side, char *uplo, char *transa, char *diag, int *m, int *n, dcomplex *alpha, dcomplex *a, int *lda, dcomplex *b, int *ldb)
void cblas_ztrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const void *alpha, const void *A, const int lda, void *B, const int ldb)

References bl1_param_map_to_netlib_diag(), bl1_param_map_to_netlib_side(), bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), cblas_ztrmm(), CblasColMajor, and F77_ztrmm().

Referenced by bl1_ztrmm().

◆ bl1_ztrmmsx()

void bl1_ztrmmsx ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
dcomplex alpha,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs,
dcomplex beta,
dcomplex c,
int  c_rs,
int  c_cs 
)
332{
333 int m_save = m;
334 int n_save = n;
335 dcomplex* a_save = a;
336 dcomplex* b_save = b;
337 dcomplex* c_save = c;
338 int a_rs_save = a_rs;
339 int a_cs_save = a_cs;
340 int b_rs_save = b_rs;
341 int b_cs_save = b_cs;
342 int c_rs_save = c_rs;
343 int c_cs_save = c_cs;
344 dcomplex one = bl1_z1();
346 int dim_a;
347 int b_copy_rs, b_copy_cs;
348
349 // Return early if possible.
350 if ( bl1_zero_dim2( m, n ) ) return;
351
352 // If necessary, allocate, initialize, and use a temporary contiguous
353 // copy of each matrix rather than the original matrices.
356 dim_a,
357 dim_a,
359 &a, &a_rs, &a_cs );
360
362 n,
364 &b, &b_rs, &b_cs );
365
367 n,
369 &c, &c_rs, &c_cs );
370
371 // Create a copy of B to use in the computation so the original matrix is
372 // left untouched.
373 b_copy = bl1_zallocm( m, n );
374
375 // Match the strides of B_copy to that of B.
376 if ( bl1_is_col_storage( b_rs, b_cs ) )
377 {
378 b_copy_rs = 1;
379 b_copy_cs = m;
380 }
381 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
382 {
383 b_copy_rs = n;
384 b_copy_cs = 1;
385 }
386
387 // Copy the contents of B to B_copy.
389 m,
390 n,
391 b, b_rs, b_cs,
393
394 // Perform the operation on B_copy.
396 uplo,
397 trans,
398 diag,
399 m,
400 n,
401 alpha,
402 a, a_rs, a_cs,
404
405 // Scale C by beta.
407 m,
408 n,
409 beta,
410 c, c_rs, c_cs );
411
412 // Add B_copy into C.
414 m,
415 n,
416 &one,
418 c, c_rs, c_cs );
419
420 // Free the copy of B.
421 bl1_zfree( b_copy );
422
423 // Free any temporary contiguous matrices, copying the result back to
424 // the original matrix.
426 &a, &a_rs, &a_cs );
427
429 &b, &b_rs, &b_cs );
430
432 n_save,
434 &c, &c_rs, &c_cs );
435}
void bl1_ztrmm(side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
Definition bl1_trmm.c:369

References bl1_is_col_storage(), bl1_set_dim_with_side(), bl1_z1(), bl1_zallocm(), bl1_zaxpymt(), bl1_zcopymt(), bl1_zcreate_contigm(), bl1_zcreate_contigmr(), bl1_zero_dim2(), bl1_zfree(), bl1_zfree_contigm(), bl1_zfree_saved_contigm(), bl1_zscalm(), bl1_ztrmm(), BLIS1_NO_CONJUGATE, and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Trmmsx_external().

◆ bl1_ztrsm()

void bl1_ztrsm ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
dcomplex alpha,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)
370{
371 int m_save = m;
372 int n_save = n;
373 dcomplex* a_save = a;
374 dcomplex* b_save = b;
375 int a_rs_save = a_rs;
376 int a_cs_save = a_cs;
377 int b_rs_save = b_rs;
378 int b_cs_save = b_cs;
380 int dim_a;
381 int lda, inca;
382 int ldb, incb;
383 int lda_conj, inca_conj;
384 int a_was_copied;
385
386 // Return early if possible.
387 if ( bl1_zero_dim2( m, n ) ) return;
388
389 // If necessary, allocate, initialize, and use a temporary contiguous
390 // copy of each matrix rather than the original matrices.
393 dim_a,
394 dim_a,
396 &a, &a_rs, &a_cs );
397
399 n,
401 &b, &b_rs, &b_cs );
402
403 // Figure out whether A was copied to contiguous memory. This is used to
404 // prevent redundant copying.
405 a_was_copied = ( a != a_save );
406
407 // Initialize with values assuming column-major storage.
408 lda = a_cs;
409 inca = a_rs;
410 ldb = b_cs;
411 incb = b_rs;
412
413 // Adjust the parameters based on the storage of each matrix.
414 if ( bl1_is_col_storage( b_rs, b_cs ) )
415 {
416 if ( bl1_is_col_storage( a_rs, a_cs ) )
417 {
418 // requested operation: B_c := tr( uplo( A_c ) ) * B_c
419 // effective operation: B_c := tr( uplo( A_c ) ) * B_c
420 }
421 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
422 {
423 // requested operation: B_c := tr( uplo( A_r ) ) * B_c
424 // effective operation: B_c := tr( ~uplo( A_c ) )^T * B_c
426
427 bl1_toggle_uplo( uplo );
429 }
430 }
431 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
432 {
433 if ( bl1_is_col_storage( a_rs, a_cs ) )
434 {
435 // requested operation: B_r := tr( uplo( A_c ) ) * B_r
436 // effective operation: B_c := B_c * tr( uplo( A_c ) )^T
438
439 bl1_swap_ints( m, n );
440
443 }
444 else // if ( bl1_is_row_storage( a_rs, a_cs ) )
445 {
446 // requested operation: B_r := tr( uplo( A_r ) ) * B_r
447 // effective operation: B_c := B_c * tr( ~uplo( A_c ) )
450
451 bl1_swap_ints( m, n );
452
454 bl1_toggle_uplo( uplo );
455 }
456 }
457
458 // Initialize with values assuming that trans is not conjnotrans.
459 a_conj = a;
460 lda_conj = lda;
461 inca_conj = inca;
462
463 // We want to handle the conjnotrans case. The easiest way to do so is
464 // by making a conjugated copy of A.
466 {
467 int dim_a;
468
470
472 lda_conj = dim_a;
473 inca_conj = 1;
474
475 bl1_zcopymrt( uplo,
477 dim_a,
478 dim_a,
479 a, inca, lda,
481 }
482 else if ( bl1_is_conjnotrans( trans ) && a_was_copied )
483 {
484 int dim_a;
485
487
488 bl1_zconjmr( uplo,
489 dim_a,
490 dim_a,
492 }
493
495 uplo,
496 trans,
497 diag,
498 m,
499 n,
500 alpha,
502 b, ldb );
503
505 bl1_zfree( a_conj );
506
507 // Free any temporary contiguous matrices, copying the result back to
508 // the original matrix.
510 &a, &a_rs, &a_cs );
511
513 n_save,
515 &b, &b_rs, &b_cs );
516}
void bl1_ztrsm_blas(side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, dcomplex *alpha, dcomplex *a, int lda, dcomplex *b, int ldb)
Definition bl1_trsm.c:661

References bl1_is_col_storage(), bl1_is_conjnotrans(), bl1_set_dim_with_side(), bl1_zallocm(), bl1_zconjmr(), bl1_zcopymrt(), bl1_zcreate_contigm(), bl1_zcreate_contigmr(), bl1_zero_dim2(), bl1_zfree(), bl1_zfree_contigm(), bl1_zfree_saved_contigm(), bl1_ztrsm_blas(), and BLIS1_CONJ_NO_TRANSPOSE.

Referenced by bl1_ztrsmsx(), FLA_LU_nopiv_opz_var1(), FLA_LU_nopiv_opz_var2(), FLA_LU_nopiv_opz_var3(), FLA_LU_piv_opz_var3(), and FLA_Trsm_external().

◆ bl1_ztrsm_blas()

void bl1_ztrsm_blas ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
dcomplex alpha,
dcomplex a,
int  lda,
dcomplex b,
int  ldb 
)
662{
663#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
669
674
680 m,
681 n,
682 alpha,
683 a, lda,
684 b, ldb );
685#else
686 char blas_side;
687 char blas_uplo;
688 char blas_trans;
689 char blas_diag;
690
695
697 &blas_uplo,
698 &blas_trans,
699 &blas_diag,
700 &m,
701 &n,
702 alpha,
703 a, &lda,
704 b, &ldb );
705#endif
706}
void F77_ztrsm(char *side, char *uplo, char *transa, char *diag, int *m, int *n, dcomplex *alpha, dcomplex *a, int *lda, dcomplex *b, int *ldb)
void cblas_ztrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const void *alpha, const void *A, const int lda, void *B, const int ldb)

References bl1_param_map_to_netlib_diag(), bl1_param_map_to_netlib_side(), bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), cblas_ztrsm(), CblasColMajor, and F77_ztrsm().

Referenced by bl1_ztrsm().

◆ bl1_ztrsmsx()

void bl1_ztrsmsx ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
dcomplex alpha,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs,
dcomplex beta,
dcomplex c,
int  c_rs,
int  c_cs 
)
332{
333 int m_save = m;
334 int n_save = n;
335 dcomplex* a_save = a;
336 dcomplex* b_save = b;
337 dcomplex* c_save = c;
338 int a_rs_save = a_rs;
339 int a_cs_save = a_cs;
340 int b_rs_save = b_rs;
341 int b_cs_save = b_cs;
342 int c_rs_save = c_rs;
343 int c_cs_save = c_cs;
344 dcomplex one = bl1_z1();
346 int dim_a;
347 int b_copy_rs, b_copy_cs;
348
349 // Return early if possible.
350 if ( bl1_zero_dim2( m, n ) ) return;
351
352 // If necessary, allocate, initialize, and use a temporary contiguous
353 // copy of each matrix rather than the original matrices.
356 dim_a,
357 dim_a,
359 &a, &a_rs, &a_cs );
360
362 n,
364 &b, &b_rs, &b_cs );
365
367 n,
369 &c, &c_rs, &c_cs );
370
371 // Create a copy of B to use in the computation so the original matrix is
372 // left untouched.
373 b_copy = bl1_zallocm( m, n );
374
375 // Match the strides of B_copy to that of B.
376 if ( bl1_is_col_storage( b_rs, b_cs ) )
377 {
378 b_copy_rs = 1;
379 b_copy_cs = m;
380 }
381 else // if ( bl1_is_row_storage( b_rs, b_cs ) )
382 {
383 b_copy_rs = n;
384 b_copy_cs = 1;
385 }
386
387 // Copy the contents of B to B_copy.
389 m,
390 n,
391 b, b_rs, b_cs,
393
394 // Perform the operation on B_copy.
396 uplo,
397 trans,
398 diag,
399 m,
400 n,
401 alpha,
402 a, a_rs, a_cs,
404
405 // Scale C by beta.
407 m,
408 n,
409 beta,
410 c, c_rs, c_cs );
411
412 // Add B_copy into C.
414 m,
415 n,
416 &one,
418 c, c_rs, c_cs );
419
420 // Free the copy of B.
421 bl1_zfree( b_copy );
422
423 // Free any temporary contiguous matrices, copying the result back to
424 // the original matrix.
426 &a, &a_rs, &a_cs );
427
429 &b, &b_rs, &b_cs );
430
432 n_save,
434 &c, &c_rs, &c_cs );
435}
void bl1_ztrsm(side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
Definition bl1_trsm.c:369

References bl1_is_col_storage(), bl1_set_dim_with_side(), bl1_z1(), bl1_zallocm(), bl1_zaxpymt(), bl1_zcopymt(), bl1_zcreate_contigm(), bl1_zcreate_contigmr(), bl1_zero_dim2(), bl1_zfree(), bl1_zfree_contigm(), bl1_zfree_saved_contigm(), bl1_zscalm(), bl1_ztrsm(), BLIS1_NO_CONJUGATE, and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Trsmsx_external().