libflame revision_anchor
Functions
FLA_Apply_G_rf_opt_var6.c File Reference

(r)

Functions

FLA_Error FLA_Apply_G_rf_opt_var6 (FLA_Obj G, FLA_Obj A)
 
FLA_Error FLA_Apply_G_rf_ops_var6 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opd_var6 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opc_var6 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opz_var6 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
 

Function Documentation

◆ FLA_Apply_G_rf_opc_var6()

FLA_Error FLA_Apply_G_rf_opc_var6 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A 
)
679{
680 float one = bl1_s1();
681 float zero = bl1_s0();
682 float gamma12;
683 float sigma12;
684 float gamma23;
685 float sigma23;
686 scomplex* a1;
687 scomplex* a2;
688 scomplex* a3;
689 scomplex* g12;
690 scomplex* g23;
691 int i, j, g, k;
692 int nG, nG_app;
693 int n_iter;
694 int n_left;
695 int k_minus_1;
696 int n_fuse;
698
699 k_minus_1 = k_G - 1;
700 nG = n_A - 1;
701 n_fuse = 2;
702
703 // Use the simple variant for nG < (k - 1) or k == 1.
704 if ( nG < k_minus_1 || k_G == 1 )
705 {
707 m_A,
708 n_A,
709 buff_G, rs_G, cs_G,
710 buff_A, rs_A, cs_A );
711 return FLA_SUCCESS;
712 }
713
714
715 // Start-up phase.
716
717 for ( j = 0; j < k_minus_1; ++j )
718 {
719 nG_app = j + 1;
720 n_iter = nG_app / n_fuse;
721 n_left = nG_app % n_fuse;
722
723 for ( i = 0, k = 0, g = nG_app - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
724 {
725 g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
726 g23 = buff_G + (g )*rs_G + (k )*cs_G;
727 a1 = buff_A + (g - 1)*cs_A;
728 a2 = buff_A + (g )*cs_A;
729 a3 = buff_A + (g + 1)*cs_A;
730
731 gamma12 = g12->real;
732 sigma12 = g12->imag;
733 gamma23 = g23->real;
734 sigma23 = g23->imag;
735
736 is_ident12 = ( gamma12 == one && sigma12 == zero );
737 is_ident23 = ( gamma23 == one && sigma23 == zero );
738
739 if ( !is_ident12 && is_ident23 )
740 {
741 // Apply only to columns 1 and 2.
742
744 &gamma12,
745 &sigma12,
746 a1, rs_A,
747 a2, rs_A );
748 }
749 else if ( is_ident12 && !is_ident23 )
750 {
751 // Apply only to columns 2 and 3.
752
754 &gamma23,
755 &sigma23,
756 a2, rs_A,
757 a3, rs_A );
758 }
759 else if ( !is_ident12 && !is_ident23 )
760 {
761 // Apply to all three columns.
762
764 &gamma12,
765 &sigma12,
766 &gamma23,
767 &sigma23,
768 a1, rs_A,
769 a2, rs_A,
770 a3, rs_A );
771 }
772 }
773 //for ( k = 0; k < n_left; k += 1, g -= 1 )
774 if ( n_left == 1 )
775 {
776 g23 = buff_G + (g )*rs_G + (k )*cs_G;
777 a2 = buff_A + (g )*cs_A;
778 a3 = buff_A + (g + 1)*cs_A;
779
780 gamma23 = g23->real;
781 sigma23 = g23->imag;
782
783 is_ident23 = ( gamma23 == one && sigma23 == zero );
784
785 if ( !is_ident23 )
787 &gamma23,
788 &sigma23,
789 a2, rs_A,
790 a3, rs_A );
791 }
792 }
793
794 // Pipeline stage
795
796 for ( j = k_minus_1; j < nG; ++j )
797 {
798 nG_app = k_G;
799 n_iter = nG_app / n_fuse;
800 n_left = nG_app % n_fuse;
801
802 for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
803 {
804 g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
805 g23 = buff_G + (g )*rs_G + (k )*cs_G;
806 a1 = buff_A + (g - 1)*cs_A;
807 a2 = buff_A + (g )*cs_A;
808 a3 = buff_A + (g + 1)*cs_A;
809
810 gamma12 = g12->real;
811 sigma12 = g12->imag;
812 gamma23 = g23->real;
813 sigma23 = g23->imag;
814
815 is_ident12 = ( gamma12 == one && sigma12 == zero );
816 is_ident23 = ( gamma23 == one && sigma23 == zero );
817
818 if ( !is_ident12 && is_ident23 )
819 {
820 // Apply only to columns 1 and 2.
821
823 &gamma12,
824 &sigma12,
825 a1, rs_A,
826 a2, rs_A );
827 }
828 else if ( is_ident12 && !is_ident23 )
829 {
830 // Apply only to columns 2 and 3.
831
833 &gamma23,
834 &sigma23,
835 a2, rs_A,
836 a3, rs_A );
837 }
838 else if ( !is_ident12 && !is_ident23 )
839 {
840 // Apply to all three columns.
841
843 &gamma12,
844 &sigma12,
845 &gamma23,
846 &sigma23,
847 a1, rs_A,
848 a2, rs_A,
849 a3, rs_A );
850 }
851 }
852 //for ( k = 0; k < n_left; k += 1, g -= 1 )
853 if ( n_left == 1 )
854 {
855 g23 = buff_G + (g )*rs_G + (k )*cs_G;
856 a2 = buff_A + (g )*cs_A;
857 a3 = buff_A + (g + 1)*cs_A;
858
859 gamma23 = g23->real;
860 sigma23 = g23->imag;
861
862 is_ident23 = ( gamma23 == one && sigma23 == zero );
863
864 if ( !is_ident23 )
866 &gamma23,
867 &sigma23,
868 a2, rs_A,
869 a3, rs_A );
870 }
871 }
872
873 // Shutdown stage
874
875 for ( j = 1; j < k_G; ++j )
876 {
877 nG_app = k_G - j;
878 n_iter = nG_app / n_fuse;
879 n_left = nG_app % n_fuse;
880
881 for ( i = 0, k = j, g = nG - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
882 {
883 g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
884 g23 = buff_G + (g )*rs_G + (k )*cs_G;
885 a1 = buff_A + (g - 1)*cs_A;
886 a2 = buff_A + (g )*cs_A;
887 a3 = buff_A + (g + 1)*cs_A;
888
889 gamma12 = g12->real;
890 sigma12 = g12->imag;
891 gamma23 = g23->real;
892 sigma23 = g23->imag;
893
894 is_ident12 = ( gamma12 == one && sigma12 == zero );
895 is_ident23 = ( gamma23 == one && sigma23 == zero );
896
897 if ( !is_ident12 && is_ident23 )
898 {
899 // Apply only to columns 1 and 2.
900
902 &gamma12,
903 &sigma12,
904 a1, rs_A,
905 a2, rs_A );
906 }
907 else if ( is_ident12 && !is_ident23 )
908 {
909 // Apply only to columns 2 and 3.
910
912 &gamma23,
913 &sigma23,
914 a2, rs_A,
915 a3, rs_A );
916 }
917 else if ( !is_ident12 && !is_ident23 )
918 {
919 // Apply to all three columns.
920
922 &gamma12,
923 &sigma12,
924 &gamma23,
925 &sigma23,
926 a1, rs_A,
927 a2, rs_A,
928 a3, rs_A );
929 }
930 }
931 //for ( k = 0; k < nG_app_left; k += 1, g -= 1 )
932 if ( n_left == 1 )
933 {
934 g23 = buff_G + (g )*rs_G + (k )*cs_G;
935 a2 = buff_A + (g )*cs_A;
936 a3 = buff_A + (g + 1)*cs_A;
937
938 gamma23 = g23->real;
939 sigma23 = g23->imag;
940
941 is_ident23 = ( gamma23 == one && sigma23 == zero );
942
943 if ( !is_ident23 )
945 &gamma23,
946 &sigma23,
947 a2, rs_A,
948 a3, rs_A );
949 }
950 }
951
952 return FLA_SUCCESS;
953}
FLA_Error FLA_Apply_G_rf_opc_var1(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_opt_var1.c:215
int n_left
Definition bl1_axmyv2.c:149
int i
Definition bl1_axmyv2.c:145
float bl1_s0(void)
Definition bl1_constants.c:111
float bl1_s1(void)
Definition bl1_constants.c:47
Definition blis_type_defs.h:133

References bl1_s0(), bl1_s1(), FLA_Apply_G_rf_opc_var1(), i, and n_left.

Referenced by FLA_Apply_G_rf_opt_var6().

◆ FLA_Apply_G_rf_opd_var6()

FLA_Error FLA_Apply_G_rf_opd_var6 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double buff_A,
int  rs_A,
int  cs_A 
)
398{
399 double one = bl1_d1();
400 double zero = bl1_d0();
401 double gamma12;
402 double sigma12;
403 double gamma23;
404 double sigma23;
405 double* a1;
406 double* a2;
407 double* a3;
408 dcomplex* g12;
409 dcomplex* g23;
410 int i, j, g, k;
411 int nG, nG_app;
412 int n_iter;
413 int n_left;
414 int k_minus_1;
415 int n_fuse;
417
418 k_minus_1 = k_G - 1;
419 nG = n_A - 1;
420 n_fuse = 2;
421
422 // Use the simple variant for nG < (k - 1) or k == 1.
423 if ( nG < k_minus_1 || k_G == 1 )
424 {
426 m_A,
427 n_A,
428 buff_G, rs_G, cs_G,
429 buff_A, rs_A, cs_A );
430 return FLA_SUCCESS;
431 }
432
433
434 // Start-up phase.
435
436 for ( j = 0; j < k_minus_1; ++j )
437 {
438 nG_app = j + 1;
439 n_iter = nG_app / n_fuse;
440 n_left = nG_app % n_fuse;
441
442 for ( i = 0, k = 0, g = nG_app - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
443 {
444 g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
445 g23 = buff_G + (g )*rs_G + (k )*cs_G;
446 a1 = buff_A + (g - 1)*cs_A;
447 a2 = buff_A + (g )*cs_A;
448 a3 = buff_A + (g + 1)*cs_A;
449
450 gamma12 = g12->real;
451 sigma12 = g12->imag;
452 gamma23 = g23->real;
453 sigma23 = g23->imag;
454
455 is_ident12 = ( gamma12 == one && sigma12 == zero );
456 is_ident23 = ( gamma23 == one && sigma23 == zero );
457
458 if ( !is_ident12 && is_ident23 )
459 {
460 // Apply only to columns 1 and 2.
461
463 &gamma12,
464 &sigma12,
465 a1, rs_A,
466 a2, rs_A );
467 }
468 else if ( is_ident12 && !is_ident23 )
469 {
470 // Apply only to columns 2 and 3.
471
473 &gamma23,
474 &sigma23,
475 a2, rs_A,
476 a3, rs_A );
477 }
478 else if ( !is_ident12 && !is_ident23 )
479 {
480 // Apply to all three columns.
481
483 &gamma12,
484 &sigma12,
485 &gamma23,
486 &sigma23,
487 a1, rs_A,
488 a2, rs_A,
489 a3, rs_A );
490 }
491 }
492 //for ( k = 0; k < n_left; k += 1, g -= 1 )
493 if ( n_left == 1 )
494 {
495 g23 = buff_G + (g )*rs_G + (k )*cs_G;
496 a2 = buff_A + (g )*cs_A;
497 a3 = buff_A + (g + 1)*cs_A;
498
499 gamma23 = g23->real;
500 sigma23 = g23->imag;
501
502 is_ident23 = ( gamma23 == one && sigma23 == zero );
503
504 if ( !is_ident23 )
506 &gamma23,
507 &sigma23,
508 a2, rs_A,
509 a3, rs_A );
510 }
511 }
512
513 // Pipeline stage
514
515 for ( j = k_minus_1; j < nG; ++j )
516 {
517 nG_app = k_G;
518 n_iter = nG_app / n_fuse;
519 n_left = nG_app % n_fuse;
520
521 for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
522 {
523 g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
524 g23 = buff_G + (g )*rs_G + (k )*cs_G;
525 a1 = buff_A + (g - 1)*cs_A;
526 a2 = buff_A + (g )*cs_A;
527 a3 = buff_A + (g + 1)*cs_A;
528
529 gamma12 = g12->real;
530 sigma12 = g12->imag;
531 gamma23 = g23->real;
532 sigma23 = g23->imag;
533
534 is_ident12 = ( gamma12 == one && sigma12 == zero );
535 is_ident23 = ( gamma23 == one && sigma23 == zero );
536
537 if ( !is_ident12 && is_ident23 )
538 {
539 // Apply only to columns 1 and 2.
540
542 &gamma12,
543 &sigma12,
544 a1, rs_A,
545 a2, rs_A );
546 }
547 else if ( is_ident12 && !is_ident23 )
548 {
549 // Apply only to columns 2 and 3.
550
552 &gamma23,
553 &sigma23,
554 a2, rs_A,
555 a3, rs_A );
556 }
557 else if ( !is_ident12 && !is_ident23 )
558 {
559 // Apply to all three columns.
560
562 &gamma12,
563 &sigma12,
564 &gamma23,
565 &sigma23,
566 a1, rs_A,
567 a2, rs_A,
568 a3, rs_A );
569 }
570 }
571 //for ( k = 0; k < n_left; k += 1, g -= 1 )
572 if ( n_left == 1 )
573 {
574 g23 = buff_G + (g )*rs_G + (k )*cs_G;
575 a2 = buff_A + (g )*cs_A;
576 a3 = buff_A + (g + 1)*cs_A;
577
578 gamma23 = g23->real;
579 sigma23 = g23->imag;
580
581 is_ident23 = ( gamma23 == one && sigma23 == zero );
582
583 if ( !is_ident23 )
585 &gamma23,
586 &sigma23,
587 a2, rs_A,
588 a3, rs_A );
589 }
590 }
591
592 // Shutdown stage
593
594 for ( j = 1; j < k_G; ++j )
595 {
596 nG_app = k_G - j;
597 n_iter = nG_app / n_fuse;
598 n_left = nG_app % n_fuse;
599
600 for ( i = 0, k = j, g = nG - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
601 {
602 g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
603 g23 = buff_G + (g )*rs_G + (k )*cs_G;
604 a1 = buff_A + (g - 1)*cs_A;
605 a2 = buff_A + (g )*cs_A;
606 a3 = buff_A + (g + 1)*cs_A;
607
608 gamma12 = g12->real;
609 sigma12 = g12->imag;
610 gamma23 = g23->real;
611 sigma23 = g23->imag;
612
613 is_ident12 = ( gamma12 == one && sigma12 == zero );
614 is_ident23 = ( gamma23 == one && sigma23 == zero );
615
616 if ( !is_ident12 && is_ident23 )
617 {
618 // Apply only to columns 1 and 2.
619
621 &gamma12,
622 &sigma12,
623 a1, rs_A,
624 a2, rs_A );
625 }
626 else if ( is_ident12 && !is_ident23 )
627 {
628 // Apply only to columns 2 and 3.
629
631 &gamma23,
632 &sigma23,
633 a2, rs_A,
634 a3, rs_A );
635 }
636 else if ( !is_ident12 && !is_ident23 )
637 {
638 // Apply to all three columns.
639
641 &gamma12,
642 &sigma12,
643 &gamma23,
644 &sigma23,
645 a1, rs_A,
646 a2, rs_A,
647 a3, rs_A );
648 }
649 }
650 //for ( k = 0; k < nG_app_left; k += 1, g -= 1 )
651 if ( n_left == 1 )
652 {
653 g23 = buff_G + (g )*rs_G + (k )*cs_G;
654 a2 = buff_A + (g )*cs_A;
655 a3 = buff_A + (g + 1)*cs_A;
656
657 gamma23 = g23->real;
658 sigma23 = g23->imag;
659
660 is_ident23 = ( gamma23 == one && sigma23 == zero );
661
662 if ( !is_ident23 )
664 &gamma23,
665 &sigma23,
666 a2, rs_A,
667 a3, rs_A );
668 }
669 }
670
671 return FLA_SUCCESS;
672}
FLA_Error FLA_Apply_G_rf_opd_var1(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_opt_var1.c:164
double bl1_d0(void)
Definition bl1_constants.c:118
double bl1_d1(void)
Definition bl1_constants.c:54
Definition blis_type_defs.h:138

References bl1_d0(), bl1_d1(), FLA_Apply_G_rf_opd_var1(), i, and n_left.

Referenced by FLA_Apply_G_rf_opt_var6().

◆ FLA_Apply_G_rf_ops_var6()

FLA_Error FLA_Apply_G_rf_ops_var6 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
float buff_A,
int  rs_A,
int  cs_A 
)
117{
118 float one = bl1_s1();
119 float zero = bl1_s0();
120 float gamma12;
121 float sigma12;
122 float gamma23;
123 float sigma23;
124 float* a1;
125 float* a2;
126 float* a3;
127 scomplex* g12;
128 scomplex* g23;
129 int i, j, g, k;
130 int nG, nG_app;
131 int n_iter;
132 int n_left;
133 int k_minus_1;
134 int n_fuse;
136
137 k_minus_1 = k_G - 1;
138 nG = n_A - 1;
139 n_fuse = 2;
140
141 // Use the simple variant for nG < (k - 1) or k == 1.
142 if ( nG < k_minus_1 || k_G == 1 )
143 {
145 m_A,
146 n_A,
147 buff_G, rs_G, cs_G,
148 buff_A, rs_A, cs_A );
149 return FLA_SUCCESS;
150 }
151
152
153 // Start-up phase.
154
155 for ( j = 0; j < k_minus_1; ++j )
156 {
157 nG_app = j + 1;
158 n_iter = nG_app / n_fuse;
159 n_left = nG_app % n_fuse;
160
161 for ( i = 0, k = 0, g = nG_app - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
162 {
163 g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
164 g23 = buff_G + (g )*rs_G + (k )*cs_G;
165 a1 = buff_A + (g - 1)*cs_A;
166 a2 = buff_A + (g )*cs_A;
167 a3 = buff_A + (g + 1)*cs_A;
168
169 gamma12 = g12->real;
170 sigma12 = g12->imag;
171 gamma23 = g23->real;
172 sigma23 = g23->imag;
173
174 is_ident12 = ( gamma12 == one && sigma12 == zero );
175 is_ident23 = ( gamma23 == one && sigma23 == zero );
176
177 if ( !is_ident12 && is_ident23 )
178 {
179 // Apply only to columns 1 and 2.
180
182 &gamma12,
183 &sigma12,
184 a1, rs_A,
185 a2, rs_A );
186 }
187 else if ( is_ident12 && !is_ident23 )
188 {
189 // Apply only to columns 2 and 3.
190
192 &gamma23,
193 &sigma23,
194 a2, rs_A,
195 a3, rs_A );
196 }
197 else if ( !is_ident12 && !is_ident23 )
198 {
199 // Apply to all three columns.
200
202 &gamma12,
203 &sigma12,
204 &gamma23,
205 &sigma23,
206 a1, rs_A,
207 a2, rs_A,
208 a3, rs_A );
209 }
210 }
211 //for ( k = 0; k < n_left; k += 1, g -= 1 )
212 if ( n_left == 1 )
213 {
214 g23 = buff_G + (g )*rs_G + (k )*cs_G;
215 a2 = buff_A + (g )*cs_A;
216 a3 = buff_A + (g + 1)*cs_A;
217
218 gamma23 = g23->real;
219 sigma23 = g23->imag;
220
221 is_ident23 = ( gamma23 == one && sigma23 == zero );
222
223 if ( !is_ident23 )
225 &gamma23,
226 &sigma23,
227 a2, rs_A,
228 a3, rs_A );
229 }
230 }
231
232 // Pipeline stage
233
234 for ( j = k_minus_1; j < nG; ++j )
235 {
236 nG_app = k_G;
237 n_iter = nG_app / n_fuse;
238 n_left = nG_app % n_fuse;
239
240 for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
241 {
242 g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
243 g23 = buff_G + (g )*rs_G + (k )*cs_G;
244 a1 = buff_A + (g - 1)*cs_A;
245 a2 = buff_A + (g )*cs_A;
246 a3 = buff_A + (g + 1)*cs_A;
247
248 gamma12 = g12->real;
249 sigma12 = g12->imag;
250 gamma23 = g23->real;
251 sigma23 = g23->imag;
252
253 is_ident12 = ( gamma12 == one && sigma12 == zero );
254 is_ident23 = ( gamma23 == one && sigma23 == zero );
255
256 if ( !is_ident12 && is_ident23 )
257 {
258 // Apply only to columns 1 and 2.
259
261 &gamma12,
262 &sigma12,
263 a1, rs_A,
264 a2, rs_A );
265 }
266 else if ( is_ident12 && !is_ident23 )
267 {
268 // Apply only to columns 2 and 3.
269
271 &gamma23,
272 &sigma23,
273 a2, rs_A,
274 a3, rs_A );
275 }
276 else if ( !is_ident12 && !is_ident23 )
277 {
278 // Apply to all three columns.
279
281 &gamma12,
282 &sigma12,
283 &gamma23,
284 &sigma23,
285 a1, rs_A,
286 a2, rs_A,
287 a3, rs_A );
288 }
289 }
290 //for ( k = 0; k < n_left; k += 1, g -= 1 )
291 if ( n_left == 1 )
292 {
293 g23 = buff_G + (g )*rs_G + (k )*cs_G;
294 a2 = buff_A + (g )*cs_A;
295 a3 = buff_A + (g + 1)*cs_A;
296
297 gamma23 = g23->real;
298 sigma23 = g23->imag;
299
300 is_ident23 = ( gamma23 == one && sigma23 == zero );
301
302 if ( !is_ident23 )
304 &gamma23,
305 &sigma23,
306 a2, rs_A,
307 a3, rs_A );
308 }
309 }
310
311 // Shutdown stage
312
313 for ( j = 1; j < k_G; ++j )
314 {
315 nG_app = k_G - j;
316 n_iter = nG_app / n_fuse;
317 n_left = nG_app % n_fuse;
318
319 for ( i = 0, k = j, g = nG - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
320 {
321 g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
322 g23 = buff_G + (g )*rs_G + (k )*cs_G;
323 a1 = buff_A + (g - 1)*cs_A;
324 a2 = buff_A + (g )*cs_A;
325 a3 = buff_A + (g + 1)*cs_A;
326
327 gamma12 = g12->real;
328 sigma12 = g12->imag;
329 gamma23 = g23->real;
330 sigma23 = g23->imag;
331
332 is_ident12 = ( gamma12 == one && sigma12 == zero );
333 is_ident23 = ( gamma23 == one && sigma23 == zero );
334
335 if ( !is_ident12 && is_ident23 )
336 {
337 // Apply only to columns 1 and 2.
338
340 &gamma12,
341 &sigma12,
342 a1, rs_A,
343 a2, rs_A );
344 }
345 else if ( is_ident12 && !is_ident23 )
346 {
347 // Apply only to columns 2 and 3.
348
350 &gamma23,
351 &sigma23,
352 a2, rs_A,
353 a3, rs_A );
354 }
355 else if ( !is_ident12 && !is_ident23 )
356 {
357 // Apply to all three columns.
358
360 &gamma12,
361 &sigma12,
362 &gamma23,
363 &sigma23,
364 a1, rs_A,
365 a2, rs_A,
366 a3, rs_A );
367 }
368 }
369 //for ( k = 0; k < nG_app_left; k += 1, g -= 1 )
370 if ( n_left == 1 )
371 {
372 g23 = buff_G + (g )*rs_G + (k )*cs_G;
373 a2 = buff_A + (g )*cs_A;
374 a3 = buff_A + (g + 1)*cs_A;
375
376 gamma23 = g23->real;
377 sigma23 = g23->imag;
378
379 is_ident23 = ( gamma23 == one && sigma23 == zero );
380
381 if ( !is_ident23 )
383 &gamma23,
384 &sigma23,
385 a2, rs_A,
386 a3, rs_A );
387 }
388 }
389
390 return FLA_SUCCESS;
391}
FLA_Error FLA_Apply_G_rf_ops_var1(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_opt_var1.c:113

References bl1_s0(), bl1_s1(), FLA_Apply_G_rf_ops_var1(), i, and n_left.

Referenced by FLA_Apply_G_rf_opt_var6().

◆ FLA_Apply_G_rf_opt_var6()

FLA_Error FLA_Apply_G_rf_opt_var6 ( FLA_Obj  G,
FLA_Obj  A 
)
32{
33 FLA_Datatype datatype;
34 int k_G, m_A, n_A;
35 int rs_G, cs_G;
36 int rs_A, cs_A;
37
38 datatype = FLA_Obj_datatype( A );
39
40 k_G = FLA_Obj_width( G );
41 m_A = FLA_Obj_length( A );
42 n_A = FLA_Obj_width( A );
43
46
49
50 switch ( datatype )
51 {
52 case FLA_FLOAT:
53 {
55 float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
56
58 m_A,
59 n_A,
61 buff_A, rs_A, cs_A );
62
63 break;
64 }
65
66 case FLA_DOUBLE:
67 {
69 double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
70
72 m_A,
73 n_A,
75 buff_A, rs_A, cs_A );
76
77 break;
78 }
79
80 case FLA_COMPLEX:
81 {
84
86 m_A,
87 n_A,
89 buff_A, rs_A, cs_A );
90
91 break;
92 }
93
95 {
98
100 m_A,
101 n_A,
102 buff_G, rs_G, cs_G,
103 buff_A, rs_A, cs_A );
104
105 break;
106 }
107 }
108
109 return FLA_SUCCESS;
110}
FLA_Error FLA_Apply_G_rf_opc_var6(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_opt_var6.c:674
FLA_Error FLA_Apply_G_rf_opd_var6(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_opt_var6.c:393
FLA_Error FLA_Apply_G_rf_ops_var6(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_opt_var6.c:112
FLA_Error FLA_Apply_G_rf_opz_var6(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_opt_var6.c:955
dim_t FLA_Obj_width(FLA_Obj obj)
Definition FLA_Query.c:123
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition FLA_Query.c:167
dim_t FLA_Obj_length(FLA_Obj obj)
Definition FLA_Query.c:116
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition FLA_Query.c:174
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition FLA_Query.c:13
int FLA_Datatype
Definition FLA_type_defs.h:49

References FLA_Apply_G_rf_opc_var6(), FLA_Apply_G_rf_opd_var6(), FLA_Apply_G_rf_ops_var6(), FLA_Apply_G_rf_opz_var6(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), and i.

◆ FLA_Apply_G_rf_opz_var6()

FLA_Error FLA_Apply_G_rf_opz_var6 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)
960{
961 double one = bl1_d1();
962 double zero = bl1_d0();
963 double gamma12;
964 double sigma12;
965 double gamma23;
966 double sigma23;
967 dcomplex* a1;
968 dcomplex* a2;
969 dcomplex* a3;
970 dcomplex* g12;
971 dcomplex* g23;
972 int i, j, g, k;
973 int nG, nG_app;
974 int n_iter;
975 int n_left;
976 int k_minus_1;
977 int n_fuse;
979
980 k_minus_1 = k_G - 1;
981 nG = n_A - 1;
982 n_fuse = 2;
983
984 // Use the simple variant for nG < (k - 1) or k == 1.
985 if ( nG < k_minus_1 || k_G == 1 )
986 {
988 m_A,
989 n_A,
990 buff_G, rs_G, cs_G,
991 buff_A, rs_A, cs_A );
992 return FLA_SUCCESS;
993 }
994
995
996 // Start-up phase.
997
998 for ( j = 0; j < k_minus_1; ++j )
999 {
1000 nG_app = j + 1;
1001 n_iter = nG_app / n_fuse;
1002 n_left = nG_app % n_fuse;
1003
1004 for ( i = 0, k = 0, g = nG_app - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
1005 {
1006 g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
1007 g23 = buff_G + (g )*rs_G + (k )*cs_G;
1008 a1 = buff_A + (g - 1)*cs_A;
1009 a2 = buff_A + (g )*cs_A;
1010 a3 = buff_A + (g + 1)*cs_A;
1011
1012 gamma12 = g12->real;
1013 sigma12 = g12->imag;
1014 gamma23 = g23->real;
1015 sigma23 = g23->imag;
1016
1017 is_ident12 = ( gamma12 == one && sigma12 == zero );
1018 is_ident23 = ( gamma23 == one && sigma23 == zero );
1019
1020 if ( !is_ident12 && is_ident23 )
1021 {
1022 // Apply only to columns 1 and 2.
1023
1025 &gamma12,
1026 &sigma12,
1027 a1, rs_A,
1028 a2, rs_A );
1029 }
1030 else if ( is_ident12 && !is_ident23 )
1031 {
1032 // Apply only to columns 2 and 3.
1033
1035 &gamma23,
1036 &sigma23,
1037 a2, rs_A,
1038 a3, rs_A );
1039 }
1040 else if ( !is_ident12 && !is_ident23 )
1041 {
1042 // Apply to all three columns.
1043
1045 &gamma12,
1046 &sigma12,
1047 &gamma23,
1048 &sigma23,
1049 a1, rs_A,
1050 a2, rs_A,
1051 a3, rs_A );
1052 }
1053 }
1054 //for ( k = 0; k < n_left; k += 1, g -= 1 )
1055 if ( n_left == 1 )
1056 {
1057 g23 = buff_G + (g )*rs_G + (k )*cs_G;
1058 a2 = buff_A + (g )*cs_A;
1059 a3 = buff_A + (g + 1)*cs_A;
1060
1061 gamma23 = g23->real;
1062 sigma23 = g23->imag;
1063
1064 is_ident23 = ( gamma23 == one && sigma23 == zero );
1065
1066 if ( !is_ident23 )
1068 &gamma23,
1069 &sigma23,
1070 a2, rs_A,
1071 a3, rs_A );
1072 }
1073 }
1074
1075 // Pipeline stage
1076
1077 for ( j = k_minus_1; j < nG; ++j )
1078 {
1079 nG_app = k_G;
1080 n_iter = nG_app / n_fuse;
1081 n_left = nG_app % n_fuse;
1082
1083 for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
1084 {
1085 g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
1086 g23 = buff_G + (g )*rs_G + (k )*cs_G;
1087 a1 = buff_A + (g - 1)*cs_A;
1088 a2 = buff_A + (g )*cs_A;
1089 a3 = buff_A + (g + 1)*cs_A;
1090
1091 gamma12 = g12->real;
1092 sigma12 = g12->imag;
1093 gamma23 = g23->real;
1094 sigma23 = g23->imag;
1095
1096 is_ident12 = ( gamma12 == one && sigma12 == zero );
1097 is_ident23 = ( gamma23 == one && sigma23 == zero );
1098
1099 if ( !is_ident12 && is_ident23 )
1100 {
1101 // Apply only to columns 1 and 2.
1102
1104 &gamma12,
1105 &sigma12,
1106 a1, rs_A,
1107 a2, rs_A );
1108 }
1109 else if ( is_ident12 && !is_ident23 )
1110 {
1111 // Apply only to columns 2 and 3.
1112
1114 &gamma23,
1115 &sigma23,
1116 a2, rs_A,
1117 a3, rs_A );
1118 }
1119 else if ( !is_ident12 && !is_ident23 )
1120 {
1121 // Apply to all three columns.
1122
1124 &gamma12,
1125 &sigma12,
1126 &gamma23,
1127 &sigma23,
1128 a1, rs_A,
1129 a2, rs_A,
1130 a3, rs_A );
1131 }
1132 }
1133 //for ( k = 0; k < n_left; k += 1, g -= 1 )
1134 if ( n_left == 1 )
1135 {
1136 g23 = buff_G + (g )*rs_G + (k )*cs_G;
1137 a2 = buff_A + (g )*cs_A;
1138 a3 = buff_A + (g + 1)*cs_A;
1139
1140 gamma23 = g23->real;
1141 sigma23 = g23->imag;
1142
1143 is_ident23 = ( gamma23 == one && sigma23 == zero );
1144
1145 if ( !is_ident23 )
1147 &gamma23,
1148 &sigma23,
1149 a2, rs_A,
1150 a3, rs_A );
1151 }
1152 }
1153
1154 // Shutdown stage
1155
1156 for ( j = 1; j < k_G; ++j )
1157 {
1158 nG_app = k_G - j;
1159 n_iter = nG_app / n_fuse;
1160 n_left = nG_app % n_fuse;
1161
1162 for ( i = 0, k = j, g = nG - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
1163 {
1164 g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
1165 g23 = buff_G + (g )*rs_G + (k )*cs_G;
1166 a1 = buff_A + (g - 1)*cs_A;
1167 a2 = buff_A + (g )*cs_A;
1168 a3 = buff_A + (g + 1)*cs_A;
1169
1170 gamma12 = g12->real;
1171 sigma12 = g12->imag;
1172 gamma23 = g23->real;
1173 sigma23 = g23->imag;
1174
1175 is_ident12 = ( gamma12 == one && sigma12 == zero );
1176 is_ident23 = ( gamma23 == one && sigma23 == zero );
1177
1178 if ( !is_ident12 && is_ident23 )
1179 {
1180 // Apply only to columns 1 and 2.
1181
1183 &gamma12,
1184 &sigma12,
1185 a1, rs_A,
1186 a2, rs_A );
1187 }
1188 else if ( is_ident12 && !is_ident23 )
1189 {
1190 // Apply only to columns 2 and 3.
1191
1193 &gamma23,
1194 &sigma23,
1195 a2, rs_A,
1196 a3, rs_A );
1197 }
1198 else if ( !is_ident12 && !is_ident23 )
1199 {
1200 // Apply to all three columns.
1201
1203 &gamma12,
1204 &sigma12,
1205 &gamma23,
1206 &sigma23,
1207 a1, rs_A,
1208 a2, rs_A,
1209 a3, rs_A );
1210 }
1211 }
1212 //for ( k = 0; k < nG_app_left; k += 1, g -= 1 )
1213 if ( n_left == 1 )
1214 {
1215 g23 = buff_G + (g )*rs_G + (k )*cs_G;
1216 a2 = buff_A + (g )*cs_A;
1217 a3 = buff_A + (g + 1)*cs_A;
1218
1219 gamma23 = g23->real;
1220 sigma23 = g23->imag;
1221
1222 is_ident23 = ( gamma23 == one && sigma23 == zero );
1223
1224 if ( !is_ident23 )
1226 &gamma23,
1227 &sigma23,
1228 a2, rs_A,
1229 a3, rs_A );
1230 }
1231 }
1232
1233 return FLA_SUCCESS;
1234}
FLA_Error FLA_Apply_G_rf_opz_var1(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_opt_var1.c:267

References bl1_d0(), bl1_d1(), FLA_Apply_G_rf_opz_var1(), i, and n_left.

Referenced by FLA_Apply_G_rf_opt_var6().