libflame revision_anchor
Functions
FLA_Apply_G_rf_opt_var3.c File Reference

(r)

Functions

FLA_Error FLA_Apply_G_rf_opt_var3 (FLA_Obj G, FLA_Obj A)
 
FLA_Error FLA_Apply_G_rf_ops_var3 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opd_var3 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opc_var3 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opz_var3 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
 

Function Documentation

◆ FLA_Apply_G_rf_opc_var3()

FLA_Error FLA_Apply_G_rf_opc_var3 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A 
)
1023{
1024 float one = bl1_s1();
1025 float zero = bl1_s0();
1026 float gamma23_k1;
1027 float sigma23_k1;
1028 float gamma34_k1;
1029 float sigma34_k1;
1030 float gamma12_k2;
1031 float sigma12_k2;
1032 float gamma23_k2;
1033 float sigma23_k2;
1034 scomplex* a1;
1035 scomplex* a2;
1036 scomplex* a3;
1037 scomplex* a4;
1042 int i, j, g, k;
1043 int nG, nG_app;
1044 int n_iter;
1045 int n_left;
1046 int k_minus_1;
1047 int n_fuse;
1048 int k_fuse;
1051 int has_ident;
1052
1053 k_minus_1 = k_G - 1;
1054 nG = n_A - 1;
1055 n_fuse = 2;
1056 k_fuse = 2;
1057
1058 // Use the simple variant for nG < (k - 1) or k == 1.
1059 if ( nG < 2*k_minus_1 || k_G == 1 )
1060 {
1062 m_A,
1063 n_A,
1064 buff_G, rs_G, cs_G,
1065 buff_A, rs_A, cs_A );
1066 return FLA_SUCCESS;
1067 }
1068
1069
1070 // Start-up phase.
1071
1072 for ( j = -1; j < k_minus_1; j += n_fuse )
1073 {
1074 nG_app = j + 2;
1075 n_iter = nG_app / k_fuse;
1076 n_left = 1;
1077
1078 for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
1079 {
1080 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1081 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1082 g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
1083 g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
1084 a1 = buff_A + (g - 1)*cs_A;
1085 a2 = buff_A + (g )*cs_A;
1086 a3 = buff_A + (g + 1)*cs_A;
1087 a4 = buff_A + (g + 2)*cs_A;
1088
1089 gamma23_k1 = g23_k1->real;
1090 sigma23_k1 = g23_k1->imag;
1091 gamma34_k1 = g34_k1->real;
1092 sigma34_k1 = g34_k1->imag;
1093 gamma12_k2 = g12_k2->real;
1094 sigma12_k2 = g12_k2->imag;
1095 gamma23_k2 = g23_k2->real;
1096 sigma23_k2 = g23_k2->imag;
1097
1104
1105 if ( has_ident )
1106 {
1107 // Apply to pairs of columns as needed.
1108
1109 if ( !is_ident23_k1 )
1111 &gamma23_k1,
1112 &sigma23_k1,
1113 a2, rs_A,
1114 a3, rs_A );
1115
1116 if ( !is_ident34_k1 )
1118 &gamma34_k1,
1119 &sigma34_k1,
1120 a3, rs_A,
1121 a4, rs_A );
1122
1123 if ( !is_ident12_k2 )
1125 &gamma12_k2,
1126 &sigma12_k2,
1127 a1, rs_A,
1128 a2, rs_A );
1129
1130 if ( !is_ident23_k2 )
1132 &gamma23_k2,
1133 &sigma23_k2,
1134 a2, rs_A,
1135 a3, rs_A );
1136 }
1137 else
1138 {
1139 // Apply to all four columns.
1140
1142 &gamma23_k1,
1143 &sigma23_k1,
1144 &gamma34_k1,
1145 &sigma34_k1,
1146 &gamma12_k2,
1147 &sigma12_k2,
1148 &gamma23_k2,
1149 &sigma23_k2,
1150 a1, rs_A,
1151 a2, rs_A,
1152 a3, rs_A,
1153 a4, rs_A );
1154 }
1155 }
1156
1157 if ( n_left == 1 )
1158 {
1159 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1160 a3 = buff_A + (g + 1)*cs_A;
1161 a4 = buff_A + (g + 2)*cs_A;
1162
1163 gamma34_k1 = g34_k1->real;
1164 sigma34_k1 = g34_k1->imag;
1165
1167
1168 if ( !is_ident34_k1 )
1170 &gamma34_k1,
1171 &sigma34_k1,
1172 a3, rs_A,
1173 a4, rs_A );
1174 }
1175 }
1176
1177 // Pipeline stage
1178
1179 for ( ; j < nG - 1; j += n_fuse )
1180 {
1181 nG_app = k_G;
1182 n_iter = nG_app / k_fuse;
1183 n_left = nG_app % k_fuse;
1184
1185 for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
1186 {
1187 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1188 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1189 g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
1190 g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
1191 a1 = buff_A + (g - 1)*cs_A;
1192 a2 = buff_A + (g )*cs_A;
1193 a3 = buff_A + (g + 1)*cs_A;
1194 a4 = buff_A + (g + 2)*cs_A;
1195
1196 gamma23_k1 = g23_k1->real;
1197 sigma23_k1 = g23_k1->imag;
1198 gamma34_k1 = g34_k1->real;
1199 sigma34_k1 = g34_k1->imag;
1200 gamma12_k2 = g12_k2->real;
1201 sigma12_k2 = g12_k2->imag;
1202 gamma23_k2 = g23_k2->real;
1203 sigma23_k2 = g23_k2->imag;
1204
1211
1212 if ( has_ident )
1213 {
1214 // Apply to pairs of columns as needed.
1215
1216 if ( !is_ident23_k1 )
1218 &gamma23_k1,
1219 &sigma23_k1,
1220 a2, rs_A,
1221 a3, rs_A );
1222
1223 if ( !is_ident34_k1 )
1225 &gamma34_k1,
1226 &sigma34_k1,
1227 a3, rs_A,
1228 a4, rs_A );
1229
1230 if ( !is_ident12_k2 )
1232 &gamma12_k2,
1233 &sigma12_k2,
1234 a1, rs_A,
1235 a2, rs_A );
1236
1237 if ( !is_ident23_k2 )
1239 &gamma23_k2,
1240 &sigma23_k2,
1241 a2, rs_A,
1242 a3, rs_A );
1243 }
1244 else
1245 {
1246 // Apply to all four columns.
1247
1249 &gamma23_k1,
1250 &sigma23_k1,
1251 &gamma34_k1,
1252 &sigma34_k1,
1253 &gamma12_k2,
1254 &sigma12_k2,
1255 &gamma23_k2,
1256 &sigma23_k2,
1257 a1, rs_A,
1258 a2, rs_A,
1259 a3, rs_A,
1260 a4, rs_A );
1261 }
1262 }
1263
1264 if ( n_left == 1 )
1265 {
1266 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1267 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1268 a2 = buff_A + (g )*cs_A;
1269 a3 = buff_A + (g + 1)*cs_A;
1270 a4 = buff_A + (g + 2)*cs_A;
1271
1272 gamma23_k1 = g23_k1->real;
1273 sigma23_k1 = g23_k1->imag;
1274 gamma34_k1 = g34_k1->real;
1275 sigma34_k1 = g34_k1->imag;
1276
1279
1280 if ( !is_ident23_k1 && is_ident34_k1 )
1281 {
1283 &gamma23_k1,
1284 &sigma23_k1,
1285 a2, rs_A,
1286 a3, rs_A );
1287 }
1288 else if ( is_ident23_k1 && !is_ident34_k1 )
1289 {
1291 &gamma34_k1,
1292 &sigma34_k1,
1293 a3, rs_A,
1294 a4, rs_A );
1295 }
1296 else
1297 {
1299 &gamma23_k1,
1300 &sigma23_k1,
1301 &gamma34_k1,
1302 &sigma34_k1,
1303 a2, rs_A,
1304 a3, rs_A,
1305 a4, rs_A );
1306 }
1307 }
1308 }
1309
1310 // Shutdown stage
1311
1312 for ( j = nG % n_fuse; j < k_G; j += n_fuse )
1313 {
1314 g = nG - 1;
1315 k = j;
1316
1317 //n_left = 1;
1318 //if ( n_left == 1 )
1319 {
1320 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1321 a2 = buff_A + (g )*cs_A;
1322 a3 = buff_A + (g + 1)*cs_A;
1323
1324 gamma23_k1 = g23_k1->real;
1325 sigma23_k1 = g23_k1->imag;
1326
1328
1329 if ( !is_ident23_k1 )
1331 &gamma23_k1,
1332 &sigma23_k1,
1333 a2, rs_A,
1334 a3, rs_A );
1335 ++k;
1336 --g;
1337 }
1338
1339 nG_app = k_minus_1 - j;
1340 n_iter = nG_app / k_fuse;
1341 n_left = nG_app % k_fuse;
1342
1343 for ( i = 0; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
1344 {
1345 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1346 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1347 g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
1348 g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
1349 a1 = buff_A + (g - 1)*cs_A;
1350 a2 = buff_A + (g )*cs_A;
1351 a3 = buff_A + (g + 1)*cs_A;
1352 a4 = buff_A + (g + 2)*cs_A;
1353
1354 gamma23_k1 = g23_k1->real;
1355 sigma23_k1 = g23_k1->imag;
1356 gamma34_k1 = g34_k1->real;
1357 sigma34_k1 = g34_k1->imag;
1358 gamma12_k2 = g12_k2->real;
1359 sigma12_k2 = g12_k2->imag;
1360 gamma23_k2 = g23_k2->real;
1361 sigma23_k2 = g23_k2->imag;
1362
1369
1370 if ( has_ident )
1371 {
1372 // Apply to pairs of columns as needed.
1373
1374 if ( !is_ident23_k1 )
1376 &gamma23_k1,
1377 &sigma23_k1,
1378 a2, rs_A,
1379 a3, rs_A );
1380
1381 if ( !is_ident34_k1 )
1383 &gamma34_k1,
1384 &sigma34_k1,
1385 a3, rs_A,
1386 a4, rs_A );
1387
1388 if ( !is_ident12_k2 )
1390 &gamma12_k2,
1391 &sigma12_k2,
1392 a1, rs_A,
1393 a2, rs_A );
1394
1395 if ( !is_ident23_k2 )
1397 &gamma23_k2,
1398 &sigma23_k2,
1399 a2, rs_A,
1400 a3, rs_A );
1401 }
1402 else
1403 {
1404 // Apply to all four columns.
1405
1407 &gamma23_k1,
1408 &sigma23_k1,
1409 &gamma34_k1,
1410 &sigma34_k1,
1411 &gamma12_k2,
1412 &sigma12_k2,
1413 &gamma23_k2,
1414 &sigma23_k2,
1415 a1, rs_A,
1416 a2, rs_A,
1417 a3, rs_A,
1418 a4, rs_A );
1419 }
1420 }
1421
1422 if ( n_left == 1 )
1423 {
1424 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1425 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1426 a2 = buff_A + (g )*cs_A;
1427 a3 = buff_A + (g + 1)*cs_A;
1428 a4 = buff_A + (g + 2)*cs_A;
1429
1430 gamma23_k1 = g23_k1->real;
1431 sigma23_k1 = g23_k1->imag;
1432 gamma34_k1 = g34_k1->real;
1433 sigma34_k1 = g34_k1->imag;
1434
1437
1438 if ( !is_ident23_k1 && is_ident34_k1 )
1439 {
1441 &gamma23_k1,
1442 &sigma23_k1,
1443 a2, rs_A,
1444 a3, rs_A );
1445 }
1446 else if ( is_ident23_k1 && !is_ident34_k1 )
1447 {
1449 &gamma34_k1,
1450 &sigma34_k1,
1451 a3, rs_A,
1452 a4, rs_A );
1453 }
1454 else
1455 {
1457 &gamma23_k1,
1458 &sigma23_k1,
1459 &gamma34_k1,
1460 &sigma34_k1,
1461 a2, rs_A,
1462 a3, rs_A,
1463 a4, rs_A );
1464 }
1465 }
1466 }
1467
1468 return FLA_SUCCESS;
1469}
FLA_Error FLA_Apply_G_rf_opc_var1(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_opt_var1.c:215
int n_left
Definition bl1_axmyv2.c:149
int i
Definition bl1_axmyv2.c:145
float bl1_s0(void)
Definition bl1_constants.c:111
float bl1_s1(void)
Definition bl1_constants.c:47
Definition blis_type_defs.h:133

References bl1_s0(), bl1_s1(), FLA_Apply_G_rf_opc_var1(), i, and n_left.

Referenced by FLA_Apply_G_rf_blc_var3(), and FLA_Apply_G_rf_opt_var3().

◆ FLA_Apply_G_rf_opd_var3()

FLA_Error FLA_Apply_G_rf_opd_var3 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double buff_A,
int  rs_A,
int  cs_A 
)
570{
571 double one = bl1_d1();
572 double zero = bl1_d0();
573 double gamma23_k1;
574 double sigma23_k1;
575 double gamma34_k1;
576 double sigma34_k1;
577 double gamma12_k2;
578 double sigma12_k2;
579 double gamma23_k2;
580 double sigma23_k2;
581 double* a1;
582 double* a2;
583 double* a3;
584 double* a4;
589 int i, j, g, k;
590 int nG, nG_app;
591 int n_iter;
592 int n_left;
593 int k_minus_1;
594 int n_fuse;
595 int k_fuse;
598 int has_ident;
599
600 k_minus_1 = k_G - 1;
601 nG = n_A - 1;
602 n_fuse = 2;
603 k_fuse = 2;
604
605 // Use the simple variant for nG < (k - 1) or k == 1.
606 if ( nG < 2*k_minus_1 || k_G == 1 )
607 {
609 m_A,
610 n_A,
611 buff_G, rs_G, cs_G,
612 buff_A, rs_A, cs_A );
613 return FLA_SUCCESS;
614 }
615
616
617 // Start-up phase.
618
619 for ( j = -1; j < k_minus_1; j += n_fuse )
620 {
621 nG_app = j + 2;
622 n_iter = nG_app / k_fuse;
623 n_left = 1;
624
625 for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
626 {
627 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
628 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
629 g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
630 g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
631 a1 = buff_A + (g - 1)*cs_A;
632 a2 = buff_A + (g )*cs_A;
633 a3 = buff_A + (g + 1)*cs_A;
634 a4 = buff_A + (g + 2)*cs_A;
635
636 gamma23_k1 = g23_k1->real;
637 sigma23_k1 = g23_k1->imag;
638 gamma34_k1 = g34_k1->real;
639 sigma34_k1 = g34_k1->imag;
640 gamma12_k2 = g12_k2->real;
641 sigma12_k2 = g12_k2->imag;
642 gamma23_k2 = g23_k2->real;
643 sigma23_k2 = g23_k2->imag;
644
651
652 if ( has_ident )
653 {
654 // Apply to pairs of columns as needed.
655
656 if ( !is_ident23_k1 )
658 &gamma23_k1,
659 &sigma23_k1,
660 a2, rs_A,
661 a3, rs_A );
662
663 if ( !is_ident34_k1 )
665 &gamma34_k1,
666 &sigma34_k1,
667 a3, rs_A,
668 a4, rs_A );
669
670 if ( !is_ident12_k2 )
672 &gamma12_k2,
673 &sigma12_k2,
674 a1, rs_A,
675 a2, rs_A );
676
677 if ( !is_ident23_k2 )
679 &gamma23_k2,
680 &sigma23_k2,
681 a2, rs_A,
682 a3, rs_A );
683 }
684 else
685 {
686 // Apply to all four columns.
687
689 &gamma23_k1,
690 &sigma23_k1,
691 &gamma34_k1,
692 &sigma34_k1,
693 &gamma12_k2,
694 &sigma12_k2,
695 &gamma23_k2,
696 &sigma23_k2,
697 a1, rs_A,
698 a2, rs_A,
699 a3, rs_A,
700 a4, rs_A );
701 }
702 }
703
704 if ( n_left == 1 )
705 {
706 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
707 a3 = buff_A + (g + 1)*cs_A;
708 a4 = buff_A + (g + 2)*cs_A;
709
710 gamma34_k1 = g34_k1->real;
711 sigma34_k1 = g34_k1->imag;
712
714
715 if ( !is_ident34_k1 )
717 &gamma34_k1,
718 &sigma34_k1,
719 a3, rs_A,
720 a4, rs_A );
721 }
722 }
723
724 // Pipeline stage
725
726 for ( ; j < nG - 1; j += n_fuse )
727 {
728 nG_app = k_G;
729 n_iter = nG_app / k_fuse;
730 n_left = nG_app % k_fuse;
731
732 for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
733 {
734 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
735 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
736 g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
737 g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
738 a1 = buff_A + (g - 1)*cs_A;
739 a2 = buff_A + (g )*cs_A;
740 a3 = buff_A + (g + 1)*cs_A;
741 a4 = buff_A + (g + 2)*cs_A;
742
743 gamma23_k1 = g23_k1->real;
744 sigma23_k1 = g23_k1->imag;
745 gamma34_k1 = g34_k1->real;
746 sigma34_k1 = g34_k1->imag;
747 gamma12_k2 = g12_k2->real;
748 sigma12_k2 = g12_k2->imag;
749 gamma23_k2 = g23_k2->real;
750 sigma23_k2 = g23_k2->imag;
751
758
759 if ( has_ident )
760 {
761 // Apply to pairs of columns as needed.
762
763 if ( !is_ident23_k1 )
765 &gamma23_k1,
766 &sigma23_k1,
767 a2, rs_A,
768 a3, rs_A );
769
770 if ( !is_ident34_k1 )
772 &gamma34_k1,
773 &sigma34_k1,
774 a3, rs_A,
775 a4, rs_A );
776
777 if ( !is_ident12_k2 )
779 &gamma12_k2,
780 &sigma12_k2,
781 a1, rs_A,
782 a2, rs_A );
783
784 if ( !is_ident23_k2 )
786 &gamma23_k2,
787 &sigma23_k2,
788 a2, rs_A,
789 a3, rs_A );
790 }
791 else
792 {
793 // Apply to all four columns.
794
796 &gamma23_k1,
797 &sigma23_k1,
798 &gamma34_k1,
799 &sigma34_k1,
800 &gamma12_k2,
801 &sigma12_k2,
802 &gamma23_k2,
803 &sigma23_k2,
804 a1, rs_A,
805 a2, rs_A,
806 a3, rs_A,
807 a4, rs_A );
808 }
809 }
810
811 if ( n_left == 1 )
812 {
813 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
814 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
815 a2 = buff_A + (g )*cs_A;
816 a3 = buff_A + (g + 1)*cs_A;
817 a4 = buff_A + (g + 2)*cs_A;
818
819 gamma23_k1 = g23_k1->real;
820 sigma23_k1 = g23_k1->imag;
821 gamma34_k1 = g34_k1->real;
822 sigma34_k1 = g34_k1->imag;
823
826
828 {
830 &gamma23_k1,
831 &sigma23_k1,
832 a2, rs_A,
833 a3, rs_A );
834 }
835 else if ( is_ident23_k1 && !is_ident34_k1 )
836 {
838 &gamma34_k1,
839 &sigma34_k1,
840 a3, rs_A,
841 a4, rs_A );
842 }
843 else
844 {
846 &gamma23_k1,
847 &sigma23_k1,
848 &gamma34_k1,
849 &sigma34_k1,
850 a2, rs_A,
851 a3, rs_A,
852 a4, rs_A );
853 }
854 }
855 }
856
857 // Shutdown stage
858
859 for ( j = nG % n_fuse; j < k_G; j += n_fuse )
860 {
861 g = nG - 1;
862 k = j;
863
864 //n_left = 1;
865 //if ( n_left == 1 )
866 {
867 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
868 a2 = buff_A + (g )*cs_A;
869 a3 = buff_A + (g + 1)*cs_A;
870
871 gamma23_k1 = g23_k1->real;
872 sigma23_k1 = g23_k1->imag;
873
875
876 if ( !is_ident23_k1 )
878 &gamma23_k1,
879 &sigma23_k1,
880 a2, rs_A,
881 a3, rs_A );
882 ++k;
883 --g;
884 }
885
886 nG_app = k_minus_1 - j;
887 n_iter = nG_app / k_fuse;
888 n_left = nG_app % k_fuse;
889
890 for ( i = 0; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
891 {
892 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
893 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
894 g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
895 g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
896 a1 = buff_A + (g - 1)*cs_A;
897 a2 = buff_A + (g )*cs_A;
898 a3 = buff_A + (g + 1)*cs_A;
899 a4 = buff_A + (g + 2)*cs_A;
900
901 gamma23_k1 = g23_k1->real;
902 sigma23_k1 = g23_k1->imag;
903 gamma34_k1 = g34_k1->real;
904 sigma34_k1 = g34_k1->imag;
905 gamma12_k2 = g12_k2->real;
906 sigma12_k2 = g12_k2->imag;
907 gamma23_k2 = g23_k2->real;
908 sigma23_k2 = g23_k2->imag;
909
916
917 if ( has_ident )
918 {
919 // Apply to pairs of columns as needed.
920
921 if ( !is_ident23_k1 )
923 &gamma23_k1,
924 &sigma23_k1,
925 a2, rs_A,
926 a3, rs_A );
927
928 if ( !is_ident34_k1 )
930 &gamma34_k1,
931 &sigma34_k1,
932 a3, rs_A,
933 a4, rs_A );
934
935 if ( !is_ident12_k2 )
937 &gamma12_k2,
938 &sigma12_k2,
939 a1, rs_A,
940 a2, rs_A );
941
942 if ( !is_ident23_k2 )
944 &gamma23_k2,
945 &sigma23_k2,
946 a2, rs_A,
947 a3, rs_A );
948 }
949 else
950 {
951 // Apply to all four columns.
952
954 &gamma23_k1,
955 &sigma23_k1,
956 &gamma34_k1,
957 &sigma34_k1,
958 &gamma12_k2,
959 &sigma12_k2,
960 &gamma23_k2,
961 &sigma23_k2,
962 a1, rs_A,
963 a2, rs_A,
964 a3, rs_A,
965 a4, rs_A );
966 }
967 }
968
969 if ( n_left == 1 )
970 {
971 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
972 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
973 a2 = buff_A + (g )*cs_A;
974 a3 = buff_A + (g + 1)*cs_A;
975 a4 = buff_A + (g + 2)*cs_A;
976
977 gamma23_k1 = g23_k1->real;
978 sigma23_k1 = g23_k1->imag;
979 gamma34_k1 = g34_k1->real;
980 sigma34_k1 = g34_k1->imag;
981
984
986 {
988 &gamma23_k1,
989 &sigma23_k1,
990 a2, rs_A,
991 a3, rs_A );
992 }
993 else if ( is_ident23_k1 && !is_ident34_k1 )
994 {
996 &gamma34_k1,
997 &sigma34_k1,
998 a3, rs_A,
999 a4, rs_A );
1000 }
1001 else
1002 {
1004 &gamma23_k1,
1005 &sigma23_k1,
1006 &gamma34_k1,
1007 &sigma34_k1,
1008 a2, rs_A,
1009 a3, rs_A,
1010 a4, rs_A );
1011 }
1012 }
1013 }
1014
1015 return FLA_SUCCESS;
1016}
FLA_Error FLA_Apply_G_rf_opd_var1(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_opt_var1.c:164
double bl1_d0(void)
Definition bl1_constants.c:118
double bl1_d1(void)
Definition bl1_constants.c:54
Definition blis_type_defs.h:138

References bl1_d0(), bl1_d1(), FLA_Apply_G_rf_opd_var1(), i, and n_left.

Referenced by FLA_Apply_G_rf_bld_var3(), and FLA_Apply_G_rf_opt_var3().

◆ FLA_Apply_G_rf_ops_var3()

FLA_Error FLA_Apply_G_rf_ops_var3 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
float buff_A,
int  rs_A,
int  cs_A 
)
117{
118 float one = bl1_s1();
119 float zero = bl1_s0();
120 float gamma23_k1;
121 float sigma23_k1;
122 float gamma34_k1;
123 float sigma34_k1;
124 float gamma12_k2;
125 float sigma12_k2;
126 float gamma23_k2;
127 float sigma23_k2;
128 float* a1;
129 float* a2;
130 float* a3;
131 float* a4;
136 int i, j, g, k;
137 int nG, nG_app;
138 int n_iter;
139 int n_left;
140 int k_minus_1;
141 int n_fuse;
142 int k_fuse;
145 int has_ident;
146
147 k_minus_1 = k_G - 1;
148 nG = n_A - 1;
149 n_fuse = 2;
150 k_fuse = 2;
151
152 // Use the simple variant for nG < (k - 1) or k == 1.
153 if ( nG < 2*k_minus_1 || k_G == 1 )
154 {
156 m_A,
157 n_A,
158 buff_G, rs_G, cs_G,
159 buff_A, rs_A, cs_A );
160 return FLA_SUCCESS;
161 }
162
163
164 // Start-up phase.
165
166 for ( j = -1; j < k_minus_1; j += n_fuse )
167 {
168 nG_app = j + 2;
169 n_iter = nG_app / k_fuse;
170 n_left = 1;
171
172 for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
173 {
174 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
175 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
176 g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
177 g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
178 a1 = buff_A + (g - 1)*cs_A;
179 a2 = buff_A + (g )*cs_A;
180 a3 = buff_A + (g + 1)*cs_A;
181 a4 = buff_A + (g + 2)*cs_A;
182
183 gamma23_k1 = g23_k1->real;
184 sigma23_k1 = g23_k1->imag;
185 gamma34_k1 = g34_k1->real;
186 sigma34_k1 = g34_k1->imag;
187 gamma12_k2 = g12_k2->real;
188 sigma12_k2 = g12_k2->imag;
189 gamma23_k2 = g23_k2->real;
190 sigma23_k2 = g23_k2->imag;
191
198
199 if ( has_ident )
200 {
201 // Apply to pairs of columns as needed.
202
203 if ( !is_ident23_k1 )
205 &gamma23_k1,
206 &sigma23_k1,
207 a2, rs_A,
208 a3, rs_A );
209
210 if ( !is_ident34_k1 )
212 &gamma34_k1,
213 &sigma34_k1,
214 a3, rs_A,
215 a4, rs_A );
216
217 if ( !is_ident12_k2 )
219 &gamma12_k2,
220 &sigma12_k2,
221 a1, rs_A,
222 a2, rs_A );
223
224 if ( !is_ident23_k2 )
226 &gamma23_k2,
227 &sigma23_k2,
228 a2, rs_A,
229 a3, rs_A );
230 }
231 else
232 {
233 // Apply to all four columns.
234
236 &gamma23_k1,
237 &sigma23_k1,
238 &gamma34_k1,
239 &sigma34_k1,
240 &gamma12_k2,
241 &sigma12_k2,
242 &gamma23_k2,
243 &sigma23_k2,
244 a1, rs_A,
245 a2, rs_A,
246 a3, rs_A,
247 a4, rs_A );
248 }
249 }
250
251 if ( n_left == 1 )
252 {
253 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
254 a3 = buff_A + (g + 1)*cs_A;
255 a4 = buff_A + (g + 2)*cs_A;
256
257 gamma34_k1 = g34_k1->real;
258 sigma34_k1 = g34_k1->imag;
259
261
262 if ( !is_ident34_k1 )
264 &gamma34_k1,
265 &sigma34_k1,
266 a3, rs_A,
267 a4, rs_A );
268 }
269 }
270
271 // Pipeline stage
272
273 for ( ; j < nG - 1; j += n_fuse )
274 {
275 nG_app = k_G;
276 n_iter = nG_app / k_fuse;
277 n_left = nG_app % k_fuse;
278
279 for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
280 {
281 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
282 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
283 g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
284 g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
285 a1 = buff_A + (g - 1)*cs_A;
286 a2 = buff_A + (g )*cs_A;
287 a3 = buff_A + (g + 1)*cs_A;
288 a4 = buff_A + (g + 2)*cs_A;
289
290 gamma23_k1 = g23_k1->real;
291 sigma23_k1 = g23_k1->imag;
292 gamma34_k1 = g34_k1->real;
293 sigma34_k1 = g34_k1->imag;
294 gamma12_k2 = g12_k2->real;
295 sigma12_k2 = g12_k2->imag;
296 gamma23_k2 = g23_k2->real;
297 sigma23_k2 = g23_k2->imag;
298
305
306 if ( has_ident )
307 {
308 // Apply to pairs of columns as needed.
309
310 if ( !is_ident23_k1 )
312 &gamma23_k1,
313 &sigma23_k1,
314 a2, rs_A,
315 a3, rs_A );
316
317 if ( !is_ident34_k1 )
319 &gamma34_k1,
320 &sigma34_k1,
321 a3, rs_A,
322 a4, rs_A );
323
324 if ( !is_ident12_k2 )
326 &gamma12_k2,
327 &sigma12_k2,
328 a1, rs_A,
329 a2, rs_A );
330
331 if ( !is_ident23_k2 )
333 &gamma23_k2,
334 &sigma23_k2,
335 a2, rs_A,
336 a3, rs_A );
337 }
338 else
339 {
340 // Apply to all four columns.
341
343 &gamma23_k1,
344 &sigma23_k1,
345 &gamma34_k1,
346 &sigma34_k1,
347 &gamma12_k2,
348 &sigma12_k2,
349 &gamma23_k2,
350 &sigma23_k2,
351 a1, rs_A,
352 a2, rs_A,
353 a3, rs_A,
354 a4, rs_A );
355 }
356 }
357
358 if ( n_left == 1 )
359 {
360 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
361 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
362 a2 = buff_A + (g )*cs_A;
363 a3 = buff_A + (g + 1)*cs_A;
364 a4 = buff_A + (g + 2)*cs_A;
365
366 gamma23_k1 = g23_k1->real;
367 sigma23_k1 = g23_k1->imag;
368 gamma34_k1 = g34_k1->real;
369 sigma34_k1 = g34_k1->imag;
370
373
375 {
377 &gamma23_k1,
378 &sigma23_k1,
379 a2, rs_A,
380 a3, rs_A );
381 }
382 else if ( is_ident23_k1 && !is_ident34_k1 )
383 {
385 &gamma34_k1,
386 &sigma34_k1,
387 a3, rs_A,
388 a4, rs_A );
389 }
390 else
391 {
393 &gamma23_k1,
394 &sigma23_k1,
395 &gamma34_k1,
396 &sigma34_k1,
397 a2, rs_A,
398 a3, rs_A,
399 a4, rs_A );
400 }
401 }
402 }
403
404 // Shutdown stage
405
406 for ( j = nG % n_fuse; j < k_G; j += n_fuse )
407 {
408 g = nG - 1;
409 k = j;
410
411 //n_left = 1;
412 //if ( n_left == 1 )
413 {
414 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
415 a2 = buff_A + (g )*cs_A;
416 a3 = buff_A + (g + 1)*cs_A;
417
418 gamma23_k1 = g23_k1->real;
419 sigma23_k1 = g23_k1->imag;
420
422
423 if ( !is_ident23_k1 )
425 &gamma23_k1,
426 &sigma23_k1,
427 a2, rs_A,
428 a3, rs_A );
429 ++k;
430 --g;
431 }
432
433 nG_app = k_minus_1 - j;
434 n_iter = nG_app / k_fuse;
435 n_left = nG_app % k_fuse;
436
437 for ( i = 0; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
438 {
439 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
440 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
441 g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
442 g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
443 a1 = buff_A + (g - 1)*cs_A;
444 a2 = buff_A + (g )*cs_A;
445 a3 = buff_A + (g + 1)*cs_A;
446 a4 = buff_A + (g + 2)*cs_A;
447
448 gamma23_k1 = g23_k1->real;
449 sigma23_k1 = g23_k1->imag;
450 gamma34_k1 = g34_k1->real;
451 sigma34_k1 = g34_k1->imag;
452 gamma12_k2 = g12_k2->real;
453 sigma12_k2 = g12_k2->imag;
454 gamma23_k2 = g23_k2->real;
455 sigma23_k2 = g23_k2->imag;
456
463
464 if ( has_ident )
465 {
466 // Apply to pairs of columns as needed.
467
468 if ( !is_ident23_k1 )
470 &gamma23_k1,
471 &sigma23_k1,
472 a2, rs_A,
473 a3, rs_A );
474
475 if ( !is_ident34_k1 )
477 &gamma34_k1,
478 &sigma34_k1,
479 a3, rs_A,
480 a4, rs_A );
481
482 if ( !is_ident12_k2 )
484 &gamma12_k2,
485 &sigma12_k2,
486 a1, rs_A,
487 a2, rs_A );
488
489 if ( !is_ident23_k2 )
491 &gamma23_k2,
492 &sigma23_k2,
493 a2, rs_A,
494 a3, rs_A );
495 }
496 else
497 {
498 // Apply to all four columns.
499
501 &gamma23_k1,
502 &sigma23_k1,
503 &gamma34_k1,
504 &sigma34_k1,
505 &gamma12_k2,
506 &sigma12_k2,
507 &gamma23_k2,
508 &sigma23_k2,
509 a1, rs_A,
510 a2, rs_A,
511 a3, rs_A,
512 a4, rs_A );
513 }
514 }
515
516 if ( n_left == 1 )
517 {
518 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
519 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
520 a2 = buff_A + (g )*cs_A;
521 a3 = buff_A + (g + 1)*cs_A;
522 a4 = buff_A + (g + 2)*cs_A;
523
524 gamma23_k1 = g23_k1->real;
525 sigma23_k1 = g23_k1->imag;
526 gamma34_k1 = g34_k1->real;
527 sigma34_k1 = g34_k1->imag;
528
531
533 {
535 &gamma23_k1,
536 &sigma23_k1,
537 a2, rs_A,
538 a3, rs_A );
539 }
540 else if ( is_ident23_k1 && !is_ident34_k1 )
541 {
543 &gamma34_k1,
544 &sigma34_k1,
545 a3, rs_A,
546 a4, rs_A );
547 }
548 else
549 {
551 &gamma23_k1,
552 &sigma23_k1,
553 &gamma34_k1,
554 &sigma34_k1,
555 a2, rs_A,
556 a3, rs_A,
557 a4, rs_A );
558 }
559 }
560 }
561
562 return FLA_SUCCESS;
563}
FLA_Error FLA_Apply_G_rf_ops_var1(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_opt_var1.c:113

References bl1_s0(), bl1_s1(), FLA_Apply_G_rf_ops_var1(), i, and n_left.

Referenced by FLA_Apply_G_rf_bls_var3(), and FLA_Apply_G_rf_opt_var3().

◆ FLA_Apply_G_rf_opt_var3()

FLA_Error FLA_Apply_G_rf_opt_var3 ( FLA_Obj  G,
FLA_Obj  A 
)
32{
33 FLA_Datatype datatype;
34 int k_G, m_A, n_A;
35 int rs_G, cs_G;
36 int rs_A, cs_A;
37
38 datatype = FLA_Obj_datatype( A );
39
40 k_G = FLA_Obj_width( G );
41 m_A = FLA_Obj_length( A );
42 n_A = FLA_Obj_width( A );
43
46
49
50 switch ( datatype )
51 {
52 case FLA_FLOAT:
53 {
55 float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
56
58 m_A,
59 n_A,
61 buff_A, rs_A, cs_A );
62
63 break;
64 }
65
66 case FLA_DOUBLE:
67 {
69 double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
70
72 m_A,
73 n_A,
75 buff_A, rs_A, cs_A );
76
77 break;
78 }
79
80 case FLA_COMPLEX:
81 {
84
86 m_A,
87 n_A,
89 buff_A, rs_A, cs_A );
90
91 break;
92 }
93
95 {
98
100 m_A,
101 n_A,
102 buff_G, rs_G, cs_G,
103 buff_A, rs_A, cs_A );
104
105 break;
106 }
107 }
108
109 return FLA_SUCCESS;
110}
FLA_Error FLA_Apply_G_rf_opd_var3(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_opt_var3.c:565
FLA_Error FLA_Apply_G_rf_ops_var3(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_opt_var3.c:112
FLA_Error FLA_Apply_G_rf_opz_var3(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_opt_var3.c:1471
FLA_Error FLA_Apply_G_rf_opc_var3(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_opt_var3.c:1018
dim_t FLA_Obj_width(FLA_Obj obj)
Definition FLA_Query.c:123
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition FLA_Query.c:167
dim_t FLA_Obj_length(FLA_Obj obj)
Definition FLA_Query.c:116
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition FLA_Query.c:174
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition FLA_Query.c:13
int FLA_Datatype
Definition FLA_type_defs.h:49

References FLA_Apply_G_rf_opc_var3(), FLA_Apply_G_rf_opd_var3(), FLA_Apply_G_rf_ops_var3(), FLA_Apply_G_rf_opz_var3(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), and i.

◆ FLA_Apply_G_rf_opz_var3()

FLA_Error FLA_Apply_G_rf_opz_var3 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)
1476{
1477 double one = bl1_d1();
1478 double zero = bl1_d0();
1479 double gamma23_k1;
1480 double sigma23_k1;
1481 double gamma34_k1;
1482 double sigma34_k1;
1483 double gamma12_k2;
1484 double sigma12_k2;
1485 double gamma23_k2;
1486 double sigma23_k2;
1487 dcomplex* a1;
1488 dcomplex* a2;
1489 dcomplex* a3;
1490 dcomplex* a4;
1495 int i, j, g, k;
1496 int nG, nG_app;
1497 int n_iter;
1498 int n_left;
1499 int k_minus_1;
1500 int n_fuse;
1501 int k_fuse;
1504 int has_ident;
1505
1506 k_minus_1 = k_G - 1;
1507 nG = n_A - 1;
1508 n_fuse = 2;
1509 k_fuse = 2;
1510
1511 // Use the simple variant for nG < (k - 1) or k == 1.
1512 if ( nG < 2*k_minus_1 || k_G == 1 )
1513 {
1515 m_A,
1516 n_A,
1517 buff_G, rs_G, cs_G,
1518 buff_A, rs_A, cs_A );
1519 return FLA_SUCCESS;
1520 }
1521
1522
1523 // Start-up phase.
1524
1525 for ( j = -1; j < k_minus_1; j += n_fuse )
1526 {
1527 nG_app = j + 2;
1528 n_iter = nG_app / k_fuse;
1529 n_left = 1;
1530
1531 for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
1532 {
1533 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1534 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1535 g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
1536 g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
1537 a1 = buff_A + (g - 1)*cs_A;
1538 a2 = buff_A + (g )*cs_A;
1539 a3 = buff_A + (g + 1)*cs_A;
1540 a4 = buff_A + (g + 2)*cs_A;
1541
1542 gamma23_k1 = g23_k1->real;
1543 sigma23_k1 = g23_k1->imag;
1544 gamma34_k1 = g34_k1->real;
1545 sigma34_k1 = g34_k1->imag;
1546 gamma12_k2 = g12_k2->real;
1547 sigma12_k2 = g12_k2->imag;
1548 gamma23_k2 = g23_k2->real;
1549 sigma23_k2 = g23_k2->imag;
1550
1557
1558 if ( has_ident )
1559 {
1560 // Apply to pairs of columns as needed.
1561
1562 if ( !is_ident23_k1 )
1564 &gamma23_k1,
1565 &sigma23_k1,
1566 a2, rs_A,
1567 a3, rs_A );
1568
1569 if ( !is_ident34_k1 )
1571 &gamma34_k1,
1572 &sigma34_k1,
1573 a3, rs_A,
1574 a4, rs_A );
1575
1576 if ( !is_ident12_k2 )
1578 &gamma12_k2,
1579 &sigma12_k2,
1580 a1, rs_A,
1581 a2, rs_A );
1582
1583 if ( !is_ident23_k2 )
1585 &gamma23_k2,
1586 &sigma23_k2,
1587 a2, rs_A,
1588 a3, rs_A );
1589 }
1590 else
1591 {
1592 // Apply to all four columns.
1593
1595 &gamma23_k1,
1596 &sigma23_k1,
1597 &gamma34_k1,
1598 &sigma34_k1,
1599 &gamma12_k2,
1600 &sigma12_k2,
1601 &gamma23_k2,
1602 &sigma23_k2,
1603 a1, rs_A,
1604 a2, rs_A,
1605 a3, rs_A,
1606 a4, rs_A );
1607 }
1608 }
1609
1610 if ( n_left == 1 )
1611 {
1612 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1613 a3 = buff_A + (g + 1)*cs_A;
1614 a4 = buff_A + (g + 2)*cs_A;
1615
1616 gamma34_k1 = g34_k1->real;
1617 sigma34_k1 = g34_k1->imag;
1618
1620
1621 if ( !is_ident34_k1 )
1623 &gamma34_k1,
1624 &sigma34_k1,
1625 a3, rs_A,
1626 a4, rs_A );
1627 }
1628 }
1629
1630 // Pipeline stage
1631
1632 for ( ; j < nG - 1; j += n_fuse )
1633 {
1634 nG_app = k_G;
1635 n_iter = nG_app / k_fuse;
1636 n_left = nG_app % k_fuse;
1637
1638 for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
1639 {
1640 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1641 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1642 g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
1643 g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
1644 a1 = buff_A + (g - 1)*cs_A;
1645 a2 = buff_A + (g )*cs_A;
1646 a3 = buff_A + (g + 1)*cs_A;
1647 a4 = buff_A + (g + 2)*cs_A;
1648
1649 gamma23_k1 = g23_k1->real;
1650 sigma23_k1 = g23_k1->imag;
1651 gamma34_k1 = g34_k1->real;
1652 sigma34_k1 = g34_k1->imag;
1653 gamma12_k2 = g12_k2->real;
1654 sigma12_k2 = g12_k2->imag;
1655 gamma23_k2 = g23_k2->real;
1656 sigma23_k2 = g23_k2->imag;
1657
1664
1665 if ( has_ident )
1666 {
1667 // Apply to pairs of columns as needed.
1668
1669 if ( !is_ident23_k1 )
1671 &gamma23_k1,
1672 &sigma23_k1,
1673 a2, rs_A,
1674 a3, rs_A );
1675
1676 if ( !is_ident34_k1 )
1678 &gamma34_k1,
1679 &sigma34_k1,
1680 a3, rs_A,
1681 a4, rs_A );
1682
1683 if ( !is_ident12_k2 )
1685 &gamma12_k2,
1686 &sigma12_k2,
1687 a1, rs_A,
1688 a2, rs_A );
1689
1690 if ( !is_ident23_k2 )
1692 &gamma23_k2,
1693 &sigma23_k2,
1694 a2, rs_A,
1695 a3, rs_A );
1696 }
1697 else
1698 {
1699 // Apply to all four columns.
1700
1702 &gamma23_k1,
1703 &sigma23_k1,
1704 &gamma34_k1,
1705 &sigma34_k1,
1706 &gamma12_k2,
1707 &sigma12_k2,
1708 &gamma23_k2,
1709 &sigma23_k2,
1710 a1, rs_A,
1711 a2, rs_A,
1712 a3, rs_A,
1713 a4, rs_A );
1714 }
1715 }
1716
1717 if ( n_left == 1 )
1718 {
1719 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1720 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1721 a2 = buff_A + (g )*cs_A;
1722 a3 = buff_A + (g + 1)*cs_A;
1723 a4 = buff_A + (g + 2)*cs_A;
1724
1725 gamma23_k1 = g23_k1->real;
1726 sigma23_k1 = g23_k1->imag;
1727 gamma34_k1 = g34_k1->real;
1728 sigma34_k1 = g34_k1->imag;
1729
1732
1733 if ( !is_ident23_k1 && is_ident34_k1 )
1734 {
1736 &gamma23_k1,
1737 &sigma23_k1,
1738 a2, rs_A,
1739 a3, rs_A );
1740 }
1741 else if ( is_ident23_k1 && !is_ident34_k1 )
1742 {
1744 &gamma34_k1,
1745 &sigma34_k1,
1746 a3, rs_A,
1747 a4, rs_A );
1748 }
1749 else
1750 {
1752 &gamma23_k1,
1753 &sigma23_k1,
1754 &gamma34_k1,
1755 &sigma34_k1,
1756 a2, rs_A,
1757 a3, rs_A,
1758 a4, rs_A );
1759 }
1760 }
1761 }
1762
1763 // Shutdown stage
1764
1765 for ( j = nG % n_fuse; j < k_G; j += n_fuse )
1766 {
1767 g = nG - 1;
1768 k = j;
1769
1770 //n_left = 1;
1771 //if ( n_left == 1 )
1772 {
1773 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1774 a2 = buff_A + (g )*cs_A;
1775 a3 = buff_A + (g + 1)*cs_A;
1776
1777 gamma23_k1 = g23_k1->real;
1778 sigma23_k1 = g23_k1->imag;
1779
1781
1782 if ( !is_ident23_k1 )
1784 &gamma23_k1,
1785 &sigma23_k1,
1786 a2, rs_A,
1787 a3, rs_A );
1788 ++k;
1789 --g;
1790 }
1791
1792 nG_app = k_minus_1 - j;
1793 n_iter = nG_app / k_fuse;
1794 n_left = nG_app % k_fuse;
1795
1796 for ( i = 0; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
1797 {
1798 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1799 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1800 g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
1801 g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
1802 a1 = buff_A + (g - 1)*cs_A;
1803 a2 = buff_A + (g )*cs_A;
1804 a3 = buff_A + (g + 1)*cs_A;
1805 a4 = buff_A + (g + 2)*cs_A;
1806
1807 gamma23_k1 = g23_k1->real;
1808 sigma23_k1 = g23_k1->imag;
1809 gamma34_k1 = g34_k1->real;
1810 sigma34_k1 = g34_k1->imag;
1811 gamma12_k2 = g12_k2->real;
1812 sigma12_k2 = g12_k2->imag;
1813 gamma23_k2 = g23_k2->real;
1814 sigma23_k2 = g23_k2->imag;
1815
1822
1823 if ( has_ident )
1824 {
1825 // Apply to pairs of columns as needed.
1826
1827 if ( !is_ident23_k1 )
1829 &gamma23_k1,
1830 &sigma23_k1,
1831 a2, rs_A,
1832 a3, rs_A );
1833
1834 if ( !is_ident34_k1 )
1836 &gamma34_k1,
1837 &sigma34_k1,
1838 a3, rs_A,
1839 a4, rs_A );
1840
1841 if ( !is_ident12_k2 )
1843 &gamma12_k2,
1844 &sigma12_k2,
1845 a1, rs_A,
1846 a2, rs_A );
1847
1848 if ( !is_ident23_k2 )
1850 &gamma23_k2,
1851 &sigma23_k2,
1852 a2, rs_A,
1853 a3, rs_A );
1854 }
1855 else
1856 {
1857 // Apply to all four columns.
1858
1860 &gamma23_k1,
1861 &sigma23_k1,
1862 &gamma34_k1,
1863 &sigma34_k1,
1864 &gamma12_k2,
1865 &sigma12_k2,
1866 &gamma23_k2,
1867 &sigma23_k2,
1868 a1, rs_A,
1869 a2, rs_A,
1870 a3, rs_A,
1871 a4, rs_A );
1872 }
1873 }
1874
1875 if ( n_left == 1 )
1876 {
1877 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1878 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1879 a2 = buff_A + (g )*cs_A;
1880 a3 = buff_A + (g + 1)*cs_A;
1881 a4 = buff_A + (g + 2)*cs_A;
1882
1883 gamma23_k1 = g23_k1->real;
1884 sigma23_k1 = g23_k1->imag;
1885 gamma34_k1 = g34_k1->real;
1886 sigma34_k1 = g34_k1->imag;
1887
1890
1891 if ( !is_ident23_k1 && is_ident34_k1 )
1892 {
1894 &gamma23_k1,
1895 &sigma23_k1,
1896 a2, rs_A,
1897 a3, rs_A );
1898 }
1899 else if ( is_ident23_k1 && !is_ident34_k1 )
1900 {
1902 &gamma34_k1,
1903 &sigma34_k1,
1904 a3, rs_A,
1905 a4, rs_A );
1906 }
1907 else
1908 {
1910 &gamma23_k1,
1911 &sigma23_k1,
1912 &gamma34_k1,
1913 &sigma34_k1,
1914 a2, rs_A,
1915 a3, rs_A,
1916 a4, rs_A );
1917 }
1918 }
1919 }
1920
1921 return FLA_SUCCESS;
1922}
FLA_Error FLA_Apply_G_rf_opz_var1(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_opt_var1.c:267

References bl1_d0(), bl1_d1(), FLA_Apply_G_rf_opz_var1(), i, and n_left.

Referenced by FLA_Apply_G_rf_blz_var3(), and FLA_Apply_G_rf_opt_var3().