libflame revision_anchor
Functions
FLA_Apply_G_rf_asm_var6.c File Reference

(r)

Functions

FLA_Error FLA_Apply_G_rf_asm_var6 (FLA_Obj G, FLA_Obj A)
 
FLA_Error FLA_Apply_G_rf_ass_var6 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asd_var6 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asc_var6 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asz_var6 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
 

Function Documentation

◆ FLA_Apply_G_rf_asc_var6()

FLA_Error FLA_Apply_G_rf_asc_var6 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A 
)
680{
681 float one = bl1_s1();
682 float zero = bl1_s0();
683 float gamma12;
684 float sigma12;
685 float gamma23;
686 float sigma23;
687 scomplex* a1;
688 scomplex* a2;
689 scomplex* a3;
690 scomplex* g12;
691 scomplex* g23;
692 int i, j, g, k;
693 int nG, nG_app;
694 int n_iter;
695 int n_left;
696 int k_minus_1;
697 int n_fuse;
699
700 k_minus_1 = k_G - 1;
701 nG = n_A - 1;
702 n_fuse = 2;
703
704 // Use the simple variant for nG < (k - 1) or k == 1.
705 if ( nG < k_minus_1 || k_G == 1 )
706 {
708 m_A,
709 n_A,
710 buff_G, rs_G, cs_G,
711 buff_A, rs_A, cs_A );
712 return FLA_SUCCESS;
713 }
714
715
716 // Start-up phase.
717
718 for ( j = 0; j < k_minus_1; ++j )
719 {
720 nG_app = j + 1;
721 n_iter = nG_app / n_fuse;
722 n_left = nG_app % n_fuse;
723
724 for ( i = 0, k = 0, g = nG_app - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
725 {
726 g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
727 g23 = buff_G + (g )*rs_G + (k )*cs_G;
728 a1 = buff_A + (g - 1)*cs_A;
729 a2 = buff_A + (g )*cs_A;
730 a3 = buff_A + (g + 1)*cs_A;
731
732 gamma12 = g12->real;
733 sigma12 = g12->imag;
734 gamma23 = g23->real;
735 sigma23 = g23->imag;
736
737 is_ident12 = ( gamma12 == one && sigma12 == zero );
738 is_ident23 = ( gamma23 == one && sigma23 == zero );
739
740 if ( !is_ident12 && is_ident23 )
741 {
742 // Apply only to columns 1 and 2.
743
745 &gamma12,
746 &sigma12,
747 a1, 1,
748 a2, 1 );
749 }
750 else if ( is_ident12 && !is_ident23 )
751 {
752 // Apply only to columns 2 and 3.
753
755 &gamma23,
756 &sigma23,
757 a2, 1,
758 a3, 1 );
759 }
760 else if ( !is_ident12 && !is_ident23 )
761 {
762 // Apply to all three columns.
763
765 &gamma12,
766 &sigma12,
767 &gamma23,
768 &sigma23,
769 a1, 1,
770 a2, 1,
771 a3, 1 );
772 }
773 }
774
775 if ( n_left == 1 )
776 {
777 g23 = buff_G + (g )*rs_G + (k )*cs_G;
778 a2 = buff_A + (g )*cs_A;
779 a3 = buff_A + (g + 1)*cs_A;
780
781 gamma23 = g23->real;
782 sigma23 = g23->imag;
783
784 is_ident23 = ( gamma23 == one && sigma23 == zero );
785
786 if ( !is_ident23 )
788 &gamma23,
789 &sigma23,
790 a2, 1,
791 a3, 1 );
792 }
793 }
794
795 // Pipeline stage
796
797 for ( j = k_minus_1; j < nG; ++j )
798 {
799 nG_app = k_G;
800 n_iter = nG_app / n_fuse;
801 n_left = nG_app % n_fuse;
802
803 for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
804 {
805 g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
806 g23 = buff_G + (g )*rs_G + (k )*cs_G;
807 a1 = buff_A + (g - 1)*cs_A;
808 a2 = buff_A + (g )*cs_A;
809 a3 = buff_A + (g + 1)*cs_A;
810
811 gamma12 = g12->real;
812 sigma12 = g12->imag;
813 gamma23 = g23->real;
814 sigma23 = g23->imag;
815
816 is_ident12 = ( gamma12 == one && sigma12 == zero );
817 is_ident23 = ( gamma23 == one && sigma23 == zero );
818
819 if ( !is_ident12 && is_ident23 )
820 {
821 // Apply only to columns 1 and 2.
822
824 &gamma12,
825 &sigma12,
826 a1, 1,
827 a2, 1 );
828 }
829 else if ( is_ident12 && !is_ident23 )
830 {
831 // Apply only to columns 2 and 3.
832
834 &gamma23,
835 &sigma23,
836 a2, 1,
837 a3, 1 );
838 }
839 else if ( !is_ident12 && !is_ident23 )
840 {
841 // Apply to all three columns.
842
844 &gamma12,
845 &sigma12,
846 &gamma23,
847 &sigma23,
848 a1, 1,
849 a2, 1,
850 a3, 1 );
851 }
852 }
853
854 if ( n_left == 1 )
855 {
856 g23 = buff_G + (g )*rs_G + (k )*cs_G;
857 a2 = buff_A + (g )*cs_A;
858 a3 = buff_A + (g + 1)*cs_A;
859
860 gamma23 = g23->real;
861 sigma23 = g23->imag;
862
863 is_ident23 = ( gamma23 == one && sigma23 == zero );
864
865 if ( !is_ident23 )
867 &gamma23,
868 &sigma23,
869 a2, 1,
870 a3, 1 );
871 }
872 }
873
874 // Shutdown stage
875
876 for ( j = 1; j < k_G; ++j )
877 {
878 nG_app = k_G - j;
879 n_iter = nG_app / n_fuse;
880 n_left = nG_app % n_fuse;
881
882 for ( i = 0, k = j, g = nG - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
883 {
884 g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
885 g23 = buff_G + (g )*rs_G + (k )*cs_G;
886 a1 = buff_A + (g - 1)*cs_A;
887 a2 = buff_A + (g )*cs_A;
888 a3 = buff_A + (g + 1)*cs_A;
889
890 gamma12 = g12->real;
891 sigma12 = g12->imag;
892 gamma23 = g23->real;
893 sigma23 = g23->imag;
894
895 is_ident12 = ( gamma12 == one && sigma12 == zero );
896 is_ident23 = ( gamma23 == one && sigma23 == zero );
897
898 if ( !is_ident12 && is_ident23 )
899 {
900 // Apply only to columns 1 and 2.
901
903 &gamma12,
904 &sigma12,
905 a1, 1,
906 a2, 1 );
907 }
908 else if ( is_ident12 && !is_ident23 )
909 {
910 // Apply only to columns 2 and 3.
911
913 &gamma23,
914 &sigma23,
915 a2, 1,
916 a3, 1 );
917 }
918 else if ( !is_ident12 && !is_ident23 )
919 {
920 // Apply to all three columns.
921
923 &gamma12,
924 &sigma12,
925 &gamma23,
926 &sigma23,
927 a1, 1,
928 a2, 1,
929 a3, 1 );
930 }
931 }
932
933 if ( n_left == 1 )
934 {
935 g23 = buff_G + (g )*rs_G + (k )*cs_G;
936 a2 = buff_A + (g )*cs_A;
937 a3 = buff_A + (g + 1)*cs_A;
938
939 gamma23 = g23->real;
940 sigma23 = g23->imag;
941
942 is_ident23 = ( gamma23 == one && sigma23 == zero );
943
944 if ( !is_ident23 )
946 &gamma23,
947 &sigma23,
948 a2, 1,
949 a3, 1 );
950 }
951 }
952
953 return FLA_SUCCESS;
954}
FLA_Error FLA_Apply_G_rf_asc_var1(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var1.c:215
int n_left
Definition bl1_axmyv2.c:149
int i
Definition bl1_axmyv2.c:145
float bl1_s0(void)
Definition bl1_constants.c:111
float bl1_s1(void)
Definition bl1_constants.c:47
Definition blis_type_defs.h:133

References bl1_s0(), bl1_s1(), FLA_Apply_G_rf_asc_var1(), i, and n_left.

Referenced by FLA_Apply_G_rf_asm_var6(), and FLA_Apply_G_rf_blc_var6().

◆ FLA_Apply_G_rf_asd_var6()

FLA_Error FLA_Apply_G_rf_asd_var6 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double buff_A,
int  rs_A,
int  cs_A 
)
399{
400 double one = bl1_d1();
401 double zero = bl1_d0();
402 double gamma12;
403 double sigma12;
404 double gamma23;
405 double sigma23;
406 double* a1;
407 double* a2;
408 double* a3;
409 dcomplex* g12;
410 dcomplex* g23;
411 int i, j, g, k;
412 int nG, nG_app;
413 int n_iter;
414 int n_left;
415 int k_minus_1;
416 int n_fuse;
418
419 k_minus_1 = k_G - 1;
420 nG = n_A - 1;
421 n_fuse = 2;
422
423 // Use the simple variant for nG < (k - 1) or k == 1.
424 if ( nG < k_minus_1 || k_G == 1 )
425 {
427 m_A,
428 n_A,
429 buff_G, rs_G, cs_G,
430 buff_A, rs_A, cs_A );
431 return FLA_SUCCESS;
432 }
433
434
435 // Start-up phase.
436
437 for ( j = 0; j < k_minus_1; ++j )
438 {
439 nG_app = j + 1;
440 n_iter = nG_app / n_fuse;
441 n_left = nG_app % n_fuse;
442
443 for ( i = 0, k = 0, g = nG_app - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
444 {
445 g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
446 g23 = buff_G + (g )*rs_G + (k )*cs_G;
447 a1 = buff_A + (g - 1)*cs_A;
448 a2 = buff_A + (g )*cs_A;
449 a3 = buff_A + (g + 1)*cs_A;
450
451 gamma12 = g12->real;
452 sigma12 = g12->imag;
453 gamma23 = g23->real;
454 sigma23 = g23->imag;
455
456 is_ident12 = ( gamma12 == one && sigma12 == zero );
457 is_ident23 = ( gamma23 == one && sigma23 == zero );
458
459 if ( !is_ident12 && is_ident23 )
460 {
461 // Apply only to columns 1 and 2.
462
464 &gamma12,
465 &sigma12,
466 a1, 1,
467 a2, 1 );
468 }
469 else if ( is_ident12 && !is_ident23 )
470 {
471 // Apply only to columns 2 and 3.
472
474 &gamma23,
475 &sigma23,
476 a2, 1,
477 a3, 1 );
478 }
479 else if ( !is_ident12 && !is_ident23 )
480 {
481 // Apply to all three columns.
482
484 &gamma12,
485 &sigma12,
486 &gamma23,
487 &sigma23,
488 a1, 1,
489 a2, 1,
490 a3, 1 );
491 }
492 }
493
494 if ( n_left == 1 )
495 {
496 g23 = buff_G + (g )*rs_G + (k )*cs_G;
497 a2 = buff_A + (g )*cs_A;
498 a3 = buff_A + (g + 1)*cs_A;
499
500 gamma23 = g23->real;
501 sigma23 = g23->imag;
502
503 is_ident23 = ( gamma23 == one && sigma23 == zero );
504
505 if ( !is_ident23 )
507 &gamma23,
508 &sigma23,
509 a2, 1,
510 a3, 1 );
511 }
512 }
513
514 // Pipeline stage
515
516 for ( j = k_minus_1; j < nG; ++j )
517 {
518 nG_app = k_G;
519 n_iter = nG_app / n_fuse;
520 n_left = nG_app % n_fuse;
521
522 for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
523 {
524 g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
525 g23 = buff_G + (g )*rs_G + (k )*cs_G;
526 a1 = buff_A + (g - 1)*cs_A;
527 a2 = buff_A + (g )*cs_A;
528 a3 = buff_A + (g + 1)*cs_A;
529
530 gamma12 = g12->real;
531 sigma12 = g12->imag;
532 gamma23 = g23->real;
533 sigma23 = g23->imag;
534
535 is_ident12 = ( gamma12 == one && sigma12 == zero );
536 is_ident23 = ( gamma23 == one && sigma23 == zero );
537
538 if ( !is_ident12 && is_ident23 )
539 {
540 // Apply only to columns 1 and 2.
541
543 &gamma12,
544 &sigma12,
545 a1, 1,
546 a2, 1 );
547 }
548 else if ( is_ident12 && !is_ident23 )
549 {
550 // Apply only to columns 2 and 3.
551
553 &gamma23,
554 &sigma23,
555 a2, 1,
556 a3, 1 );
557 }
558 else if ( !is_ident12 && !is_ident23 )
559 {
560 // Apply to all three columns.
561
563 &gamma12,
564 &sigma12,
565 &gamma23,
566 &sigma23,
567 a1, 1,
568 a2, 1,
569 a3, 1 );
570 }
571 }
572
573 if ( n_left == 1 )
574 {
575 g23 = buff_G + (g )*rs_G + (k )*cs_G;
576 a2 = buff_A + (g )*cs_A;
577 a3 = buff_A + (g + 1)*cs_A;
578
579 gamma23 = g23->real;
580 sigma23 = g23->imag;
581
582 is_ident23 = ( gamma23 == one && sigma23 == zero );
583
584 if ( !is_ident23 )
586 &gamma23,
587 &sigma23,
588 a2, 1,
589 a3, 1 );
590 }
591 }
592
593 // Shutdown stage
594
595 for ( j = 1; j < k_G; ++j )
596 {
597 nG_app = k_G - j;
598 n_iter = nG_app / n_fuse;
599 n_left = nG_app % n_fuse;
600
601 for ( i = 0, k = j, g = nG - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
602 {
603 g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
604 g23 = buff_G + (g )*rs_G + (k )*cs_G;
605 a1 = buff_A + (g - 1)*cs_A;
606 a2 = buff_A + (g )*cs_A;
607 a3 = buff_A + (g + 1)*cs_A;
608
609 gamma12 = g12->real;
610 sigma12 = g12->imag;
611 gamma23 = g23->real;
612 sigma23 = g23->imag;
613
614 is_ident12 = ( gamma12 == one && sigma12 == zero );
615 is_ident23 = ( gamma23 == one && sigma23 == zero );
616
617 if ( !is_ident12 && is_ident23 )
618 {
619 // Apply only to columns 1 and 2.
620
622 &gamma12,
623 &sigma12,
624 a1, 1,
625 a2, 1 );
626 }
627 else if ( is_ident12 && !is_ident23 )
628 {
629 // Apply only to columns 2 and 3.
630
632 &gamma23,
633 &sigma23,
634 a2, 1,
635 a3, 1 );
636 }
637 else if ( !is_ident12 && !is_ident23 )
638 {
639 // Apply to all three columns.
640
642 &gamma12,
643 &sigma12,
644 &gamma23,
645 &sigma23,
646 a1, 1,
647 a2, 1,
648 a3, 1 );
649 }
650 }
651
652 if ( n_left == 1 )
653 {
654 g23 = buff_G + (g )*rs_G + (k )*cs_G;
655 a2 = buff_A + (g )*cs_A;
656 a3 = buff_A + (g + 1)*cs_A;
657
658 gamma23 = g23->real;
659 sigma23 = g23->imag;
660
661 is_ident23 = ( gamma23 == one && sigma23 == zero );
662
663 if ( !is_ident23 )
665 &gamma23,
666 &sigma23,
667 a2, 1,
668 a3, 1 );
669 }
670 }
671
672 return FLA_SUCCESS;
673}
FLA_Error FLA_Apply_G_rf_asd_var1(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var1.c:164
double bl1_d0(void)
Definition bl1_constants.c:118
double bl1_d1(void)
Definition bl1_constants.c:54
Definition blis_type_defs.h:138

References bl1_d0(), bl1_d1(), FLA_Apply_G_rf_asd_var1(), i, and n_left.

Referenced by FLA_Apply_G_rf_asm_var6(), and FLA_Apply_G_rf_bld_var6().

◆ FLA_Apply_G_rf_asm_var6()

FLA_Error FLA_Apply_G_rf_asm_var6 ( FLA_Obj  G,
FLA_Obj  A 
)
32{
33 FLA_Datatype datatype;
34 int k_G, m_A, n_A;
35 int rs_G, cs_G;
36 int rs_A, cs_A;
37
38 datatype = FLA_Obj_datatype( A );
39
40 k_G = FLA_Obj_width( G );
41 m_A = FLA_Obj_length( A );
42 n_A = FLA_Obj_width( A );
43
46
49
50 switch ( datatype )
51 {
52 case FLA_FLOAT:
53 {
55 float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
56
58 m_A,
59 n_A,
61 buff_A, rs_A, cs_A );
62
63 break;
64 }
65
66 case FLA_DOUBLE:
67 {
69 double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
70
72 m_A,
73 n_A,
75 buff_A, rs_A, cs_A );
76
77 break;
78 }
79
80 case FLA_COMPLEX:
81 {
84
86 m_A,
87 n_A,
89 buff_A, rs_A, cs_A );
90
91 break;
92 }
93
95 {
98
100 m_A,
101 n_A,
102 buff_G, rs_G, cs_G,
103 buff_A, rs_A, cs_A );
104
105 break;
106 }
107 }
108
109 return FLA_SUCCESS;
110}
FLA_Error FLA_Apply_G_rf_asc_var6(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var6.c:675
FLA_Error FLA_Apply_G_rf_asd_var6(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var6.c:394
FLA_Error FLA_Apply_G_rf_asz_var6(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var6.c:956
FLA_Error FLA_Apply_G_rf_ass_var6(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var6.c:113
dim_t FLA_Obj_width(FLA_Obj obj)
Definition FLA_Query.c:123
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition FLA_Query.c:167
dim_t FLA_Obj_length(FLA_Obj obj)
Definition FLA_Query.c:116
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition FLA_Query.c:174
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition FLA_Query.c:13
int FLA_Datatype
Definition FLA_type_defs.h:49

References FLA_Apply_G_rf_asc_var6(), FLA_Apply_G_rf_asd_var6(), FLA_Apply_G_rf_ass_var6(), FLA_Apply_G_rf_asz_var6(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), and i.

◆ FLA_Apply_G_rf_ass_var6()

FLA_Error FLA_Apply_G_rf_ass_var6 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
float buff_A,
int  rs_A,
int  cs_A 
)
118{
119 float one = bl1_s1();
120 float zero = bl1_s0();
121 float gamma12;
122 float sigma12;
123 float gamma23;
124 float sigma23;
125 float* a1;
126 float* a2;
127 float* a3;
128 scomplex* g12;
129 scomplex* g23;
130 int i, j, g, k;
131 int nG, nG_app;
132 int n_iter;
133 int n_left;
134 int k_minus_1;
135 int n_fuse;
137
138 k_minus_1 = k_G - 1;
139 nG = n_A - 1;
140 n_fuse = 2;
141
142 // Use the simple variant for nG < (k - 1) or k == 1.
143 if ( nG < k_minus_1 || k_G == 1 )
144 {
146 m_A,
147 n_A,
148 buff_G, rs_G, cs_G,
149 buff_A, rs_A, cs_A );
150 return FLA_SUCCESS;
151 }
152
153
154 // Start-up phase.
155
156 for ( j = 0; j < k_minus_1; ++j )
157 {
158 nG_app = j + 1;
159 n_iter = nG_app / n_fuse;
160 n_left = nG_app % n_fuse;
161
162 for ( i = 0, k = 0, g = nG_app - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
163 {
164 g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
165 g23 = buff_G + (g )*rs_G + (k )*cs_G;
166 a1 = buff_A + (g - 1)*cs_A;
167 a2 = buff_A + (g )*cs_A;
168 a3 = buff_A + (g + 1)*cs_A;
169
170 gamma12 = g12->real;
171 sigma12 = g12->imag;
172 gamma23 = g23->real;
173 sigma23 = g23->imag;
174
175 is_ident12 = ( gamma12 == one && sigma12 == zero );
176 is_ident23 = ( gamma23 == one && sigma23 == zero );
177
178 if ( !is_ident12 && is_ident23 )
179 {
180 // Apply only to columns 1 and 2.
181
183 &gamma12,
184 &sigma12,
185 a1, 1,
186 a2, 1 );
187 }
188 else if ( is_ident12 && !is_ident23 )
189 {
190 // Apply only to columns 2 and 3.
191
193 &gamma23,
194 &sigma23,
195 a2, 1,
196 a3, 1 );
197 }
198 else if ( !is_ident12 && !is_ident23 )
199 {
200 // Apply to all three columns.
201
203 &gamma12,
204 &sigma12,
205 &gamma23,
206 &sigma23,
207 a1, 1,
208 a2, 1,
209 a3, 1 );
210 }
211 }
212
213 if ( n_left == 1 )
214 {
215 g23 = buff_G + (g )*rs_G + (k )*cs_G;
216 a2 = buff_A + (g )*cs_A;
217 a3 = buff_A + (g + 1)*cs_A;
218
219 gamma23 = g23->real;
220 sigma23 = g23->imag;
221
222 is_ident23 = ( gamma23 == one && sigma23 == zero );
223
224 if ( !is_ident23 )
226 &gamma23,
227 &sigma23,
228 a2, 1,
229 a3, 1 );
230 }
231 }
232
233 // Pipeline stage
234
235 for ( j = k_minus_1; j < nG; ++j )
236 {
237 nG_app = k_G;
238 n_iter = nG_app / n_fuse;
239 n_left = nG_app % n_fuse;
240
241 for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
242 {
243 g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
244 g23 = buff_G + (g )*rs_G + (k )*cs_G;
245 a1 = buff_A + (g - 1)*cs_A;
246 a2 = buff_A + (g )*cs_A;
247 a3 = buff_A + (g + 1)*cs_A;
248
249 gamma12 = g12->real;
250 sigma12 = g12->imag;
251 gamma23 = g23->real;
252 sigma23 = g23->imag;
253
254 is_ident12 = ( gamma12 == one && sigma12 == zero );
255 is_ident23 = ( gamma23 == one && sigma23 == zero );
256
257 if ( !is_ident12 && is_ident23 )
258 {
259 // Apply only to columns 1 and 2.
260
262 &gamma12,
263 &sigma12,
264 a1, 1,
265 a2, 1 );
266 }
267 else if ( is_ident12 && !is_ident23 )
268 {
269 // Apply only to columns 2 and 3.
270
272 &gamma23,
273 &sigma23,
274 a2, 1,
275 a3, 1 );
276 }
277 else if ( !is_ident12 && !is_ident23 )
278 {
279 // Apply to all three columns.
280
282 &gamma12,
283 &sigma12,
284 &gamma23,
285 &sigma23,
286 a1, 1,
287 a2, 1,
288 a3, 1 );
289 }
290 }
291
292 if ( n_left == 1 )
293 {
294 g23 = buff_G + (g )*rs_G + (k )*cs_G;
295 a2 = buff_A + (g )*cs_A;
296 a3 = buff_A + (g + 1)*cs_A;
297
298 gamma23 = g23->real;
299 sigma23 = g23->imag;
300
301 is_ident23 = ( gamma23 == one && sigma23 == zero );
302
303 if ( !is_ident23 )
305 &gamma23,
306 &sigma23,
307 a2, 1,
308 a3, 1 );
309 }
310 }
311
312 // Shutdown stage
313
314 for ( j = 1; j < k_G; ++j )
315 {
316 nG_app = k_G - j;
317 n_iter = nG_app / n_fuse;
318 n_left = nG_app % n_fuse;
319
320 for ( i = 0, k = j, g = nG - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
321 {
322 g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
323 g23 = buff_G + (g )*rs_G + (k )*cs_G;
324 a1 = buff_A + (g - 1)*cs_A;
325 a2 = buff_A + (g )*cs_A;
326 a3 = buff_A + (g + 1)*cs_A;
327
328 gamma12 = g12->real;
329 sigma12 = g12->imag;
330 gamma23 = g23->real;
331 sigma23 = g23->imag;
332
333 is_ident12 = ( gamma12 == one && sigma12 == zero );
334 is_ident23 = ( gamma23 == one && sigma23 == zero );
335
336 if ( !is_ident12 && is_ident23 )
337 {
338 // Apply only to columns 1 and 2.
339
341 &gamma12,
342 &sigma12,
343 a1, 1,
344 a2, 1 );
345 }
346 else if ( is_ident12 && !is_ident23 )
347 {
348 // Apply only to columns 2 and 3.
349
351 &gamma23,
352 &sigma23,
353 a2, 1,
354 a3, 1 );
355 }
356 else if ( !is_ident12 && !is_ident23 )
357 {
358 // Apply to all three columns.
359
361 &gamma12,
362 &sigma12,
363 &gamma23,
364 &sigma23,
365 a1, 1,
366 a2, 1,
367 a3, 1 );
368 }
369 }
370
371 if ( n_left == 1 )
372 {
373 g23 = buff_G + (g )*rs_G + (k )*cs_G;
374 a2 = buff_A + (g )*cs_A;
375 a3 = buff_A + (g + 1)*cs_A;
376
377 gamma23 = g23->real;
378 sigma23 = g23->imag;
379
380 is_ident23 = ( gamma23 == one && sigma23 == zero );
381
382 if ( !is_ident23 )
384 &gamma23,
385 &sigma23,
386 a2, 1,
387 a3, 1 );
388 }
389 }
390
391 return FLA_SUCCESS;
392}
FLA_Error FLA_Apply_G_rf_ass_var1(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var1.c:113

References bl1_s0(), bl1_s1(), FLA_Apply_G_rf_ass_var1(), i, and n_left.

Referenced by FLA_Apply_G_rf_asm_var6(), and FLA_Apply_G_rf_bls_var6().

◆ FLA_Apply_G_rf_asz_var6()

FLA_Error FLA_Apply_G_rf_asz_var6 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)
961{
962 double one = bl1_d1();
963 double zero = bl1_d0();
964 double gamma12;
965 double sigma12;
966 double gamma23;
967 double sigma23;
968 dcomplex* a1;
969 dcomplex* a2;
970 dcomplex* a3;
971 dcomplex* g12;
972 dcomplex* g23;
973 int i, j, g, k;
974 int nG, nG_app;
975 int n_iter;
976 int n_left;
977 int k_minus_1;
978 int n_fuse;
980
981 k_minus_1 = k_G - 1;
982 nG = n_A - 1;
983 n_fuse = 2;
984
985 // Use the simple variant for nG < (k - 1) or k == 1.
986 if ( nG < k_minus_1 || k_G == 1 )
987 {
989 m_A,
990 n_A,
991 buff_G, rs_G, cs_G,
992 buff_A, rs_A, cs_A );
993 return FLA_SUCCESS;
994 }
995
996
997 // Start-up phase.
998
999 for ( j = 0; j < k_minus_1; ++j )
1000 {
1001 nG_app = j + 1;
1002 n_iter = nG_app / n_fuse;
1003 n_left = nG_app % n_fuse;
1004
1005 //for ( k = 0, g = nG_app - 1; k < nG_app; k += n_fuse, g -= n_fuse )
1006 for ( i = 0, k = 0, g = nG_app - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
1007 {
1008 g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
1009 g23 = buff_G + (g )*rs_G + (k )*cs_G;
1010 a1 = buff_A + (g - 1)*cs_A;
1011 a2 = buff_A + (g )*cs_A;
1012 a3 = buff_A + (g + 1)*cs_A;
1013
1014 gamma12 = g12->real;
1015 sigma12 = g12->imag;
1016 gamma23 = g23->real;
1017 sigma23 = g23->imag;
1018
1019 is_ident12 = ( gamma12 == one && sigma12 == zero );
1020 is_ident23 = ( gamma23 == one && sigma23 == zero );
1021
1022 if ( !is_ident12 && is_ident23 )
1023 {
1024 // Apply only to columns 1 and 2.
1025
1027 &gamma12,
1028 &sigma12,
1029 a1, 1,
1030 a2, 1 );
1031 }
1032 else if ( is_ident12 && !is_ident23 )
1033 {
1034 // Apply only to columns 2 and 3.
1035
1037 &gamma23,
1038 &sigma23,
1039 a2, 1,
1040 a3, 1 );
1041 }
1042 else if ( !is_ident12 && !is_ident23 )
1043 {
1044 // Apply to all three columns.
1045
1047 &gamma12,
1048 &sigma12,
1049 &gamma23,
1050 &sigma23,
1051 a1, 1,
1052 a2, 1,
1053 a3, 1 );
1054 }
1055 }
1056 //for ( k = 0; k < n_left; k += 1, g -= 1 )
1057 if ( n_left == 1 )
1058 {
1059 g23 = buff_G + (g )*rs_G + (k )*cs_G;
1060 a2 = buff_A + (g )*cs_A;
1061 a3 = buff_A + (g + 1)*cs_A;
1062
1063 gamma23 = g23->real;
1064 sigma23 = g23->imag;
1065
1066 is_ident23 = ( gamma23 == one && sigma23 == zero );
1067
1068 if ( !is_ident23 )
1070 &gamma23,
1071 &sigma23,
1072 a2, 1,
1073 a3, 1 );
1074 }
1075 }
1076
1077 // Pipeline stage
1078
1079 for ( j = k_minus_1; j < nG; ++j )
1080 {
1081 nG_app = k_G;
1082 n_iter = nG_app / n_fuse;
1083 n_left = nG_app % n_fuse;
1084
1085 for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
1086 {
1087 g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
1088 g23 = buff_G + (g )*rs_G + (k )*cs_G;
1089 a1 = buff_A + (g - 1)*cs_A;
1090 a2 = buff_A + (g )*cs_A;
1091 a3 = buff_A + (g + 1)*cs_A;
1092
1093 gamma12 = g12->real;
1094 sigma12 = g12->imag;
1095 gamma23 = g23->real;
1096 sigma23 = g23->imag;
1097
1098 is_ident12 = ( gamma12 == one && sigma12 == zero );
1099 is_ident23 = ( gamma23 == one && sigma23 == zero );
1100
1101 if ( !is_ident12 && is_ident23 )
1102 {
1103 // Apply only to columns 1 and 2.
1104
1106 &gamma12,
1107 &sigma12,
1108 a1, 1,
1109 a2, 1 );
1110 }
1111 else if ( is_ident12 && !is_ident23 )
1112 {
1113 // Apply only to columns 2 and 3.
1114
1116 &gamma23,
1117 &sigma23,
1118 a2, 1,
1119 a3, 1 );
1120 }
1121 else if ( !is_ident12 && !is_ident23 )
1122 {
1123 // Apply to all three columns.
1124
1126 &gamma12,
1127 &sigma12,
1128 &gamma23,
1129 &sigma23,
1130 a1, 1,
1131 a2, 1,
1132 a3, 1 );
1133 }
1134 }
1135 //for ( k = 0; k < n_left; k += 1, g -= 1 )
1136 if ( n_left == 1 )
1137 {
1138 g23 = buff_G + (g )*rs_G + (k )*cs_G;
1139 a2 = buff_A + (g )*cs_A;
1140 a3 = buff_A + (g + 1)*cs_A;
1141
1142 gamma23 = g23->real;
1143 sigma23 = g23->imag;
1144
1145 is_ident23 = ( gamma23 == one && sigma23 == zero );
1146
1147 if ( !is_ident23 )
1149 &gamma23,
1150 &sigma23,
1151 a2, 1,
1152 a3, 1 );
1153 }
1154 }
1155
1156 // Shutdown stage
1157
1158 for ( j = 1; j < k_G; ++j )
1159 {
1160 nG_app = k_G - j;
1161 n_iter = nG_app / n_fuse;
1162 n_left = nG_app % n_fuse;
1163
1164 for ( i = 0, k = j, g = nG - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
1165 {
1166 g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
1167 g23 = buff_G + (g )*rs_G + (k )*cs_G;
1168 a1 = buff_A + (g - 1)*cs_A;
1169 a2 = buff_A + (g )*cs_A;
1170 a3 = buff_A + (g + 1)*cs_A;
1171
1172 gamma12 = g12->real;
1173 sigma12 = g12->imag;
1174 gamma23 = g23->real;
1175 sigma23 = g23->imag;
1176
1177 is_ident12 = ( gamma12 == one && sigma12 == zero );
1178 is_ident23 = ( gamma23 == one && sigma23 == zero );
1179
1180 if ( !is_ident12 && is_ident23 )
1181 {
1182 // Apply only to columns 1 and 2.
1183
1185 &gamma12,
1186 &sigma12,
1187 a1, 1,
1188 a2, 1 );
1189 }
1190 else if ( is_ident12 && !is_ident23 )
1191 {
1192 // Apply only to columns 2 and 3.
1193
1195 &gamma23,
1196 &sigma23,
1197 a2, 1,
1198 a3, 1 );
1199 }
1200 else if ( !is_ident12 && !is_ident23 )
1201 {
1202 // Apply to all three columns.
1203
1205 &gamma12,
1206 &sigma12,
1207 &gamma23,
1208 &sigma23,
1209 a1, 1,
1210 a2, 1,
1211 a3, 1 );
1212 }
1213 }
1214 //for ( k = 0; k < nG_app_left; k += 1, g -= 1 )
1215 if ( n_left == 1 )
1216 {
1217 g23 = buff_G + (g )*rs_G + (k )*cs_G;
1218 a2 = buff_A + (g )*cs_A;
1219 a3 = buff_A + (g + 1)*cs_A;
1220
1221 gamma23 = g23->real;
1222 sigma23 = g23->imag;
1223
1224 is_ident23 = ( gamma23 == one && sigma23 == zero );
1225
1226 if ( !is_ident23 )
1228 &gamma23,
1229 &sigma23,
1230 a2, 1,
1231 a3, 1 );
1232 }
1233 }
1234
1235 return FLA_SUCCESS;
1236}
FLA_Error FLA_Apply_G_rf_asz_var1(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var1.c:267

References bl1_d0(), bl1_d1(), FLA_Apply_G_rf_asz_var1(), i, and n_left.

Referenced by FLA_Apply_G_rf_asm_var6(), and FLA_Apply_G_rf_blz_var6().