libflame revision_anchor
Functions
FLA_Apply_G_rf_asm_var3.c File Reference

(r)

Functions

FLA_Error FLA_Apply_G_rf_asm_var3 (FLA_Obj G, FLA_Obj A)
 
FLA_Error FLA_Apply_G_rf_ass_var3 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asd_var3 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asc_var3 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asz_var3 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
 

Function Documentation

◆ FLA_Apply_G_rf_asc_var3()

FLA_Error FLA_Apply_G_rf_asc_var3 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A 
)
1024{
1025 float one = bl1_s1();
1026 float zero = bl1_s0();
1027 float gamma23_k1;
1028 float sigma23_k1;
1029 float gamma34_k1;
1030 float sigma34_k1;
1031 float gamma12_k2;
1032 float sigma12_k2;
1033 float gamma23_k2;
1034 float sigma23_k2;
1035 scomplex* a1;
1036 scomplex* a2;
1037 scomplex* a3;
1038 scomplex* a4;
1043 int i, j, g, k;
1044 int nG, nG_app;
1045 int n_iter;
1046 int n_left;
1047 int k_minus_1;
1048 int n_fuse;
1049 int k_fuse;
1052 int has_ident;
1053
1054 k_minus_1 = k_G - 1;
1055 nG = n_A - 1;
1056 n_fuse = 2;
1057 k_fuse = 2;
1058
1059 // Use the simple variant for nG < (k - 1) or k == 1.
1060 if ( nG < 2*k_minus_1 || k_G == 1 )
1061 {
1063 m_A,
1064 n_A,
1065 buff_G, rs_G, cs_G,
1066 buff_A, rs_A, cs_A );
1067 return FLA_SUCCESS;
1068 }
1069
1070
1071 // Start-up phase.
1072
1073 for ( j = -1; j < k_minus_1; j += n_fuse )
1074 {
1075 nG_app = j + 2;
1076 n_iter = nG_app / k_fuse;
1077 //n_iter = nG_app % k_fuse;
1078 n_left = 1;
1079
1080 for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
1081 {
1082 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1083 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1084 g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
1085 g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
1086 a1 = buff_A + (g - 1)*cs_A;
1087 a2 = buff_A + (g )*cs_A;
1088 a3 = buff_A + (g + 1)*cs_A;
1089 a4 = buff_A + (g + 2)*cs_A;
1090
1091 gamma23_k1 = g23_k1->real;
1092 sigma23_k1 = g23_k1->imag;
1093 gamma34_k1 = g34_k1->real;
1094 sigma34_k1 = g34_k1->imag;
1095 gamma12_k2 = g12_k2->real;
1096 sigma12_k2 = g12_k2->imag;
1097 gamma23_k2 = g23_k2->real;
1098 sigma23_k2 = g23_k2->imag;
1099
1106
1107 if ( has_ident )
1108 {
1109 // Apply to pairs of columns as needed.
1110
1111 if ( !is_ident23_k1 )
1113 &gamma23_k1,
1114 &sigma23_k1,
1115 a2, 1,
1116 a3, 1 );
1117
1118 if ( !is_ident34_k1 )
1120 &gamma34_k1,
1121 &sigma34_k1,
1122 a3, 1,
1123 a4, 1 );
1124
1125 if ( !is_ident12_k2 )
1127 &gamma12_k2,
1128 &sigma12_k2,
1129 a1, 1,
1130 a2, 1 );
1131
1132 if ( !is_ident23_k2 )
1134 &gamma23_k2,
1135 &sigma23_k2,
1136 a2, 1,
1137 a3, 1 );
1138 }
1139 else
1140 {
1141 // Apply to all four columns.
1142
1144 &gamma23_k1,
1145 &sigma23_k1,
1146 &gamma34_k1,
1147 &sigma34_k1,
1148 &gamma12_k2,
1149 &sigma12_k2,
1150 &gamma23_k2,
1151 &sigma23_k2,
1152 a1, 1,
1153 a2, 1,
1154 a3, 1,
1155 a4, 1 );
1156 }
1157 }
1158
1159 if ( n_left == 1 )
1160 {
1161 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1162 a3 = buff_A + (g + 1)*cs_A;
1163 a4 = buff_A + (g + 2)*cs_A;
1164
1165 gamma34_k1 = g34_k1->real;
1166 sigma34_k1 = g34_k1->imag;
1167
1169
1170 if ( !is_ident34_k1 )
1172 &gamma34_k1,
1173 &sigma34_k1,
1174 a3, 1,
1175 a4, 1 );
1176 }
1177 }
1178
1179 // Pipeline stage
1180
1181 for ( ; j < nG - 1; j += n_fuse )
1182 {
1183 nG_app = k_G;
1184 n_iter = nG_app / k_fuse;
1185 n_left = nG_app % k_fuse;
1186
1187 for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
1188 {
1189 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1190 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1191 g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
1192 g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
1193 a1 = buff_A + (g - 1)*cs_A;
1194 a2 = buff_A + (g )*cs_A;
1195 a3 = buff_A + (g + 1)*cs_A;
1196 a4 = buff_A + (g + 2)*cs_A;
1197
1198 gamma23_k1 = g23_k1->real;
1199 sigma23_k1 = g23_k1->imag;
1200 gamma34_k1 = g34_k1->real;
1201 sigma34_k1 = g34_k1->imag;
1202 gamma12_k2 = g12_k2->real;
1203 sigma12_k2 = g12_k2->imag;
1204 gamma23_k2 = g23_k2->real;
1205 sigma23_k2 = g23_k2->imag;
1206
1213
1214 if ( has_ident )
1215 {
1216 // Apply to pairs of columns as needed.
1217
1218 if ( !is_ident23_k1 )
1220 &gamma23_k1,
1221 &sigma23_k1,
1222 a2, 1,
1223 a3, 1 );
1224
1225 if ( !is_ident34_k1 )
1227 &gamma34_k1,
1228 &sigma34_k1,
1229 a3, 1,
1230 a4, 1 );
1231
1232 if ( !is_ident12_k2 )
1234 &gamma12_k2,
1235 &sigma12_k2,
1236 a1, 1,
1237 a2, 1 );
1238
1239 if ( !is_ident23_k2 )
1241 &gamma23_k2,
1242 &sigma23_k2,
1243 a2, 1,
1244 a3, 1 );
1245 }
1246 else
1247 {
1248 // Apply to all four columns.
1249
1251 &gamma23_k1,
1252 &sigma23_k1,
1253 &gamma34_k1,
1254 &sigma34_k1,
1255 &gamma12_k2,
1256 &sigma12_k2,
1257 &gamma23_k2,
1258 &sigma23_k2,
1259 a1, 1,
1260 a2, 1,
1261 a3, 1,
1262 a4, 1 );
1263 }
1264 }
1265
1266 if ( n_left == 1 )
1267 {
1268 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1269 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1270 a2 = buff_A + (g )*cs_A;
1271 a3 = buff_A + (g + 1)*cs_A;
1272 a4 = buff_A + (g + 2)*cs_A;
1273
1274 gamma23_k1 = g23_k1->real;
1275 sigma23_k1 = g23_k1->imag;
1276 gamma34_k1 = g34_k1->real;
1277 sigma34_k1 = g34_k1->imag;
1278
1281
1282 if ( !is_ident23_k1 && is_ident34_k1 )
1283 {
1285 &gamma23_k1,
1286 &sigma23_k1,
1287 a2, 1,
1288 a3, 1 );
1289 }
1290 else if ( is_ident23_k1 && !is_ident34_k1 )
1291 {
1293 &gamma34_k1,
1294 &sigma34_k1,
1295 a3, 1,
1296 a4, 1 );
1297 }
1298 else
1299 {
1301 &gamma23_k1,
1302 &sigma23_k1,
1303 &gamma34_k1,
1304 &sigma34_k1,
1305 a2, 1,
1306 a3, 1,
1307 a4, 1 );
1308 }
1309 }
1310 }
1311
1312 // Shutdown stage
1313
1314 for ( j = nG % n_fuse; j < k_G; j += n_fuse )
1315 {
1316 g = nG - 1;
1317 k = j;
1318
1319 //n_left = 1;
1320 //if ( n_left == 1 )
1321 {
1322 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1323 a2 = buff_A + (g )*cs_A;
1324 a3 = buff_A + (g + 1)*cs_A;
1325
1326 gamma23_k1 = g23_k1->real;
1327 sigma23_k1 = g23_k1->imag;
1328
1330
1331 if ( !is_ident23_k1 )
1333 &gamma23_k1,
1334 &sigma23_k1,
1335 a2, 1,
1336 a3, 1 );
1337 ++k;
1338 --g;
1339 }
1340
1341 nG_app = k_minus_1 - j;
1342 n_iter = nG_app / k_fuse;
1343 n_left = nG_app % k_fuse;
1344
1345 for ( i = 0; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
1346 {
1347 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1348 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1349 g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
1350 g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
1351 a1 = buff_A + (g - 1)*cs_A;
1352 a2 = buff_A + (g )*cs_A;
1353 a3 = buff_A + (g + 1)*cs_A;
1354 a4 = buff_A + (g + 2)*cs_A;
1355
1356 gamma23_k1 = g23_k1->real;
1357 sigma23_k1 = g23_k1->imag;
1358 gamma34_k1 = g34_k1->real;
1359 sigma34_k1 = g34_k1->imag;
1360 gamma12_k2 = g12_k2->real;
1361 sigma12_k2 = g12_k2->imag;
1362 gamma23_k2 = g23_k2->real;
1363 sigma23_k2 = g23_k2->imag;
1364
1371
1372 if ( has_ident )
1373 {
1374 // Apply to pairs of columns as needed.
1375
1376 if ( !is_ident23_k1 )
1378 &gamma23_k1,
1379 &sigma23_k1,
1380 a2, 1,
1381 a3, 1 );
1382
1383 if ( !is_ident34_k1 )
1385 &gamma34_k1,
1386 &sigma34_k1,
1387 a3, 1,
1388 a4, 1 );
1389
1390 if ( !is_ident12_k2 )
1392 &gamma12_k2,
1393 &sigma12_k2,
1394 a1, 1,
1395 a2, 1 );
1396
1397 if ( !is_ident23_k2 )
1399 &gamma23_k2,
1400 &sigma23_k2,
1401 a2, 1,
1402 a3, 1 );
1403 }
1404 else
1405 {
1406 // Apply to all four columns.
1407
1409 &gamma23_k1,
1410 &sigma23_k1,
1411 &gamma34_k1,
1412 &sigma34_k1,
1413 &gamma12_k2,
1414 &sigma12_k2,
1415 &gamma23_k2,
1416 &sigma23_k2,
1417 a1, 1,
1418 a2, 1,
1419 a3, 1,
1420 a4, 1 );
1421 }
1422 }
1423
1424 if ( n_left == 1 )
1425 {
1426 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1427 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1428 a2 = buff_A + (g )*cs_A;
1429 a3 = buff_A + (g + 1)*cs_A;
1430 a4 = buff_A + (g + 2)*cs_A;
1431
1432 gamma23_k1 = g23_k1->real;
1433 sigma23_k1 = g23_k1->imag;
1434 gamma34_k1 = g34_k1->real;
1435 sigma34_k1 = g34_k1->imag;
1436
1439
1440 if ( !is_ident23_k1 && is_ident34_k1 )
1441 {
1443 &gamma23_k1,
1444 &sigma23_k1,
1445 a2, 1,
1446 a3, 1 );
1447 }
1448 else if ( is_ident23_k1 && !is_ident34_k1 )
1449 {
1451 &gamma34_k1,
1452 &sigma34_k1,
1453 a3, 1,
1454 a4, 1 );
1455 }
1456 else
1457 {
1459 &gamma23_k1,
1460 &sigma23_k1,
1461 &gamma34_k1,
1462 &sigma34_k1,
1463 a2, 1,
1464 a3, 1,
1465 a4, 1 );
1466 }
1467 }
1468 }
1469
1470 return FLA_SUCCESS;
1471}
FLA_Error FLA_Apply_G_rf_asc_var1(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var1.c:215
int n_left
Definition bl1_axmyv2.c:149
int i
Definition bl1_axmyv2.c:145
float bl1_s0(void)
Definition bl1_constants.c:111
float bl1_s1(void)
Definition bl1_constants.c:47
Definition blis_type_defs.h:133

References bl1_s0(), bl1_s1(), FLA_Apply_G_rf_asc_var1(), i, and n_left.

Referenced by FLA_Apply_G_rf_asm_var3().

◆ FLA_Apply_G_rf_asd_var3()

FLA_Error FLA_Apply_G_rf_asd_var3 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double buff_A,
int  rs_A,
int  cs_A 
)
571{
572 double one = bl1_d1();
573 double zero = bl1_d0();
574 double gamma23_k1;
575 double sigma23_k1;
576 double gamma34_k1;
577 double sigma34_k1;
578 double gamma12_k2;
579 double sigma12_k2;
580 double gamma23_k2;
581 double sigma23_k2;
582 double* a1;
583 double* a2;
584 double* a3;
585 double* a4;
590 int i, j, g, k;
591 int nG, nG_app;
592 int n_iter;
593 int n_left;
594 int k_minus_1;
595 int n_fuse;
596 int k_fuse;
599 int has_ident;
600
601 k_minus_1 = k_G - 1;
602 nG = n_A - 1;
603 n_fuse = 2;
604 k_fuse = 2;
605
606 // Use the simple variant for nG < (k - 1) or k == 1.
607 if ( nG < 2*k_minus_1 || k_G == 1 )
608 {
610 m_A,
611 n_A,
612 buff_G, rs_G, cs_G,
613 buff_A, rs_A, cs_A );
614 return FLA_SUCCESS;
615 }
616
617
618 // Start-up phase.
619
620 for ( j = -1; j < k_minus_1; j += n_fuse )
621 {
622 nG_app = j + 2;
623 n_iter = nG_app / k_fuse;
624 n_left = 1;
625
626 for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
627 {
628 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
629 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
630 g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
631 g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
632 a1 = buff_A + (g - 1)*cs_A;
633 a2 = buff_A + (g )*cs_A;
634 a3 = buff_A + (g + 1)*cs_A;
635 a4 = buff_A + (g + 2)*cs_A;
636
637 gamma23_k1 = g23_k1->real;
638 sigma23_k1 = g23_k1->imag;
639 gamma34_k1 = g34_k1->real;
640 sigma34_k1 = g34_k1->imag;
641 gamma12_k2 = g12_k2->real;
642 sigma12_k2 = g12_k2->imag;
643 gamma23_k2 = g23_k2->real;
644 sigma23_k2 = g23_k2->imag;
645
652
653 if ( has_ident )
654 {
655 // Apply to pairs of columns as needed.
656
657 if ( !is_ident23_k1 )
659 &gamma23_k1,
660 &sigma23_k1,
661 a2, 1,
662 a3, 1 );
663
664 if ( !is_ident34_k1 )
666 &gamma34_k1,
667 &sigma34_k1,
668 a3, 1,
669 a4, 1 );
670
671 if ( !is_ident12_k2 )
673 &gamma12_k2,
674 &sigma12_k2,
675 a1, 1,
676 a2, 1 );
677
678 if ( !is_ident23_k2 )
680 &gamma23_k2,
681 &sigma23_k2,
682 a2, 1,
683 a3, 1 );
684 }
685 else
686 {
687 // Apply to all four columns.
688
690 &gamma23_k1,
691 &sigma23_k1,
692 &gamma34_k1,
693 &sigma34_k1,
694 &gamma12_k2,
695 &sigma12_k2,
696 &gamma23_k2,
697 &sigma23_k2,
698 a1, 1,
699 a2, 1,
700 a3, 1,
701 a4, 1 );
702 }
703 }
704
705 if ( n_left == 1 )
706 {
707 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
708 a3 = buff_A + (g + 1)*cs_A;
709 a4 = buff_A + (g + 2)*cs_A;
710
711 gamma34_k1 = g34_k1->real;
712 sigma34_k1 = g34_k1->imag;
713
715
716 if ( !is_ident34_k1 )
718 &gamma34_k1,
719 &sigma34_k1,
720 a3, 1,
721 a4, 1 );
722 }
723 }
724
725 // Pipeline stage
726
727 for ( ; j < nG - 1; j += n_fuse )
728 {
729 nG_app = k_G;
730 n_iter = nG_app / k_fuse;
731 n_left = nG_app % k_fuse;
732
733 for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
734 {
735 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
736 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
737 g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
738 g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
739 a1 = buff_A + (g - 1)*cs_A;
740 a2 = buff_A + (g )*cs_A;
741 a3 = buff_A + (g + 1)*cs_A;
742 a4 = buff_A + (g + 2)*cs_A;
743
744 gamma23_k1 = g23_k1->real;
745 sigma23_k1 = g23_k1->imag;
746 gamma34_k1 = g34_k1->real;
747 sigma34_k1 = g34_k1->imag;
748 gamma12_k2 = g12_k2->real;
749 sigma12_k2 = g12_k2->imag;
750 gamma23_k2 = g23_k2->real;
751 sigma23_k2 = g23_k2->imag;
752
759
760 if ( has_ident )
761 {
762 // Apply to pairs of columns as needed.
763
764 if ( !is_ident23_k1 )
766 &gamma23_k1,
767 &sigma23_k1,
768 a2, 1,
769 a3, 1 );
770
771 if ( !is_ident34_k1 )
773 &gamma34_k1,
774 &sigma34_k1,
775 a3, 1,
776 a4, 1 );
777
778 if ( !is_ident12_k2 )
780 &gamma12_k2,
781 &sigma12_k2,
782 a1, 1,
783 a2, 1 );
784
785 if ( !is_ident23_k2 )
787 &gamma23_k2,
788 &sigma23_k2,
789 a2, 1,
790 a3, 1 );
791 }
792 else
793 {
794 // Apply to all four columns.
795
797 &gamma23_k1,
798 &sigma23_k1,
799 &gamma34_k1,
800 &sigma34_k1,
801 &gamma12_k2,
802 &sigma12_k2,
803 &gamma23_k2,
804 &sigma23_k2,
805 a1, 1,
806 a2, 1,
807 a3, 1,
808 a4, 1 );
809 }
810 }
811
812 if ( n_left == 1 )
813 {
814 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
815 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
816 a2 = buff_A + (g )*cs_A;
817 a3 = buff_A + (g + 1)*cs_A;
818 a4 = buff_A + (g + 2)*cs_A;
819
820 gamma23_k1 = g23_k1->real;
821 sigma23_k1 = g23_k1->imag;
822 gamma34_k1 = g34_k1->real;
823 sigma34_k1 = g34_k1->imag;
824
827
829 {
831 &gamma23_k1,
832 &sigma23_k1,
833 a2, 1,
834 a3, 1 );
835 }
836 else if ( is_ident23_k1 && !is_ident34_k1 )
837 {
839 &gamma34_k1,
840 &sigma34_k1,
841 a3, 1,
842 a4, 1 );
843 }
844 else
845 {
847 &gamma23_k1,
848 &sigma23_k1,
849 &gamma34_k1,
850 &sigma34_k1,
851 a2, 1,
852 a3, 1,
853 a4, 1 );
854 }
855 }
856 }
857
858 // Shutdown stage
859
860 for ( j = nG % n_fuse; j < k_G; j += n_fuse )
861 {
862 g = nG - 1;
863 k = j;
864
865 //n_left = 1;
866 //if ( n_left == 1 )
867 {
868 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
869 a2 = buff_A + (g )*cs_A;
870 a3 = buff_A + (g + 1)*cs_A;
871
872 gamma23_k1 = g23_k1->real;
873 sigma23_k1 = g23_k1->imag;
874
876
877 if ( !is_ident23_k1 )
879 &gamma23_k1,
880 &sigma23_k1,
881 a2, 1,
882 a3, 1 );
883 ++k;
884 --g;
885 }
886
887 nG_app = k_minus_1 - j;
888 n_iter = nG_app / k_fuse;
889 n_left = nG_app % k_fuse;
890
891 for ( i = 0; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
892 {
893 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
894 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
895 g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
896 g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
897 a1 = buff_A + (g - 1)*cs_A;
898 a2 = buff_A + (g )*cs_A;
899 a3 = buff_A + (g + 1)*cs_A;
900 a4 = buff_A + (g + 2)*cs_A;
901
902 gamma23_k1 = g23_k1->real;
903 sigma23_k1 = g23_k1->imag;
904 gamma34_k1 = g34_k1->real;
905 sigma34_k1 = g34_k1->imag;
906 gamma12_k2 = g12_k2->real;
907 sigma12_k2 = g12_k2->imag;
908 gamma23_k2 = g23_k2->real;
909 sigma23_k2 = g23_k2->imag;
910
917
918 if ( has_ident )
919 {
920 // Apply to pairs of columns as needed.
921
922 if ( !is_ident23_k1 )
924 &gamma23_k1,
925 &sigma23_k1,
926 a2, 1,
927 a3, 1 );
928
929 if ( !is_ident34_k1 )
931 &gamma34_k1,
932 &sigma34_k1,
933 a3, 1,
934 a4, 1 );
935
936 if ( !is_ident12_k2 )
938 &gamma12_k2,
939 &sigma12_k2,
940 a1, 1,
941 a2, 1 );
942
943 if ( !is_ident23_k2 )
945 &gamma23_k2,
946 &sigma23_k2,
947 a2, 1,
948 a3, 1 );
949 }
950 else
951 {
952 // Apply to all four columns.
953
955 &gamma23_k1,
956 &sigma23_k1,
957 &gamma34_k1,
958 &sigma34_k1,
959 &gamma12_k2,
960 &sigma12_k2,
961 &gamma23_k2,
962 &sigma23_k2,
963 a1, 1,
964 a2, 1,
965 a3, 1,
966 a4, 1 );
967 }
968 }
969
970 if ( n_left == 1 )
971 {
972 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
973 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
974 a2 = buff_A + (g )*cs_A;
975 a3 = buff_A + (g + 1)*cs_A;
976 a4 = buff_A + (g + 2)*cs_A;
977
978 gamma23_k1 = g23_k1->real;
979 sigma23_k1 = g23_k1->imag;
980 gamma34_k1 = g34_k1->real;
981 sigma34_k1 = g34_k1->imag;
982
985
987 {
989 &gamma23_k1,
990 &sigma23_k1,
991 a2, 1,
992 a3, 1 );
993 }
994 else if ( is_ident23_k1 && !is_ident34_k1 )
995 {
997 &gamma34_k1,
998 &sigma34_k1,
999 a3, 1,
1000 a4, 1 );
1001 }
1002 else
1003 {
1005 &gamma23_k1,
1006 &sigma23_k1,
1007 &gamma34_k1,
1008 &sigma34_k1,
1009 a2, 1,
1010 a3, 1,
1011 a4, 1 );
1012 }
1013 }
1014 }
1015
1016 return FLA_SUCCESS;
1017}
FLA_Error FLA_Apply_G_rf_asd_var1(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var1.c:164
double bl1_d0(void)
Definition bl1_constants.c:118
double bl1_d1(void)
Definition bl1_constants.c:54
Definition blis_type_defs.h:138

References bl1_d0(), bl1_d1(), FLA_Apply_G_rf_asd_var1(), i, and n_left.

Referenced by FLA_Apply_G_rf_asm_var3().

◆ FLA_Apply_G_rf_asm_var3()

FLA_Error FLA_Apply_G_rf_asm_var3 ( FLA_Obj  G,
FLA_Obj  A 
)
32{
33 FLA_Datatype datatype;
34 int k_G, m_A, n_A;
35 int rs_G, cs_G;
36 int rs_A, cs_A;
37
38 datatype = FLA_Obj_datatype( A );
39
40 k_G = FLA_Obj_width( G );
41 m_A = FLA_Obj_length( A );
42 n_A = FLA_Obj_width( A );
43
46
49
50 switch ( datatype )
51 {
52 case FLA_FLOAT:
53 {
55 float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
56
58 m_A,
59 n_A,
61 buff_A, rs_A, cs_A );
62
63 break;
64 }
65
66 case FLA_DOUBLE:
67 {
69 double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
70
72 m_A,
73 n_A,
75 buff_A, rs_A, cs_A );
76
77 break;
78 }
79
80 case FLA_COMPLEX:
81 {
84
86 m_A,
87 n_A,
89 buff_A, rs_A, cs_A );
90
91 break;
92 }
93
95 {
98
100 m_A,
101 n_A,
102 buff_G, rs_G, cs_G,
103 buff_A, rs_A, cs_A );
104
105 break;
106 }
107 }
108
109 return FLA_SUCCESS;
110}
FLA_Error FLA_Apply_G_rf_ass_var3(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var3.c:113
FLA_Error FLA_Apply_G_rf_asz_var3(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var3.c:1473
FLA_Error FLA_Apply_G_rf_asd_var3(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var3.c:566
FLA_Error FLA_Apply_G_rf_asc_var3(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var3.c:1019
dim_t FLA_Obj_width(FLA_Obj obj)
Definition FLA_Query.c:123
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition FLA_Query.c:167
dim_t FLA_Obj_length(FLA_Obj obj)
Definition FLA_Query.c:116
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition FLA_Query.c:174
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition FLA_Query.c:13
int FLA_Datatype
Definition FLA_type_defs.h:49

References FLA_Apply_G_rf_asc_var3(), FLA_Apply_G_rf_asd_var3(), FLA_Apply_G_rf_ass_var3(), FLA_Apply_G_rf_asz_var3(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), and i.

◆ FLA_Apply_G_rf_ass_var3()

FLA_Error FLA_Apply_G_rf_ass_var3 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
float buff_A,
int  rs_A,
int  cs_A 
)
118{
119 float one = bl1_s1();
120 float zero = bl1_s0();
121 float gamma23_k1;
122 float sigma23_k1;
123 float gamma34_k1;
124 float sigma34_k1;
125 float gamma12_k2;
126 float sigma12_k2;
127 float gamma23_k2;
128 float sigma23_k2;
129 float* a1;
130 float* a2;
131 float* a3;
132 float* a4;
137 int i, j, g, k;
138 int nG, nG_app;
139 int n_iter;
140 int n_left;
141 int k_minus_1;
142 int n_fuse;
143 int k_fuse;
146 int has_ident;
147
148 k_minus_1 = k_G - 1;
149 nG = n_A - 1;
150 n_fuse = 2;
151 k_fuse = 2;
152
153 // Use the simple variant for nG < (k - 1) or k == 1.
154 if ( nG < 2*k_minus_1 || k_G == 1 )
155 {
157 m_A,
158 n_A,
159 buff_G, rs_G, cs_G,
160 buff_A, rs_A, cs_A );
161 return FLA_SUCCESS;
162 }
163
164
165 // Start-up phase.
166
167 for ( j = -1; j < k_minus_1; j += n_fuse )
168 {
169 nG_app = j + 2;
170 n_iter = nG_app / k_fuse;
171 n_left = 1;
172
173 for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
174 {
175 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
176 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
177 g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
178 g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
179 a1 = buff_A + (g - 1)*cs_A;
180 a2 = buff_A + (g )*cs_A;
181 a3 = buff_A + (g + 1)*cs_A;
182 a4 = buff_A + (g + 2)*cs_A;
183
184 gamma23_k1 = g23_k1->real;
185 sigma23_k1 = g23_k1->imag;
186 gamma34_k1 = g34_k1->real;
187 sigma34_k1 = g34_k1->imag;
188 gamma12_k2 = g12_k2->real;
189 sigma12_k2 = g12_k2->imag;
190 gamma23_k2 = g23_k2->real;
191 sigma23_k2 = g23_k2->imag;
192
199
200 if ( has_ident )
201 {
202 // Apply to pairs of columns as needed.
203
204 if ( !is_ident23_k1 )
206 &gamma23_k1,
207 &sigma23_k1,
208 a2, 1,
209 a3, 1 );
210
211 if ( !is_ident34_k1 )
213 &gamma34_k1,
214 &sigma34_k1,
215 a3, 1,
216 a4, 1 );
217
218 if ( !is_ident12_k2 )
220 &gamma12_k2,
221 &sigma12_k2,
222 a1, 1,
223 a2, 1 );
224
225 if ( !is_ident23_k2 )
227 &gamma23_k2,
228 &sigma23_k2,
229 a2, 1,
230 a3, 1 );
231 }
232 else
233 {
234 // Apply to all four columns.
235
237 &gamma23_k1,
238 &sigma23_k1,
239 &gamma34_k1,
240 &sigma34_k1,
241 &gamma12_k2,
242 &sigma12_k2,
243 &gamma23_k2,
244 &sigma23_k2,
245 a1, 1,
246 a2, 1,
247 a3, 1,
248 a4, 1 );
249 }
250 }
251
252 if ( n_left == 1 )
253 {
254 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
255 a3 = buff_A + (g + 1)*cs_A;
256 a4 = buff_A + (g + 2)*cs_A;
257
258 gamma34_k1 = g34_k1->real;
259 sigma34_k1 = g34_k1->imag;
260
262
263 if ( !is_ident34_k1 )
265 &gamma34_k1,
266 &sigma34_k1,
267 a3, 1,
268 a4, 1 );
269 }
270 }
271
272 // Pipeline stage
273
274 for ( ; j < nG - 1; j += n_fuse )
275 {
276 nG_app = k_G;
277 n_iter = nG_app / k_fuse;
278 n_left = nG_app % k_fuse;
279
280 for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
281 {
282 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
283 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
284 g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
285 g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
286 a1 = buff_A + (g - 1)*cs_A;
287 a2 = buff_A + (g )*cs_A;
288 a3 = buff_A + (g + 1)*cs_A;
289 a4 = buff_A + (g + 2)*cs_A;
290
291 gamma23_k1 = g23_k1->real;
292 sigma23_k1 = g23_k1->imag;
293 gamma34_k1 = g34_k1->real;
294 sigma34_k1 = g34_k1->imag;
295 gamma12_k2 = g12_k2->real;
296 sigma12_k2 = g12_k2->imag;
297 gamma23_k2 = g23_k2->real;
298 sigma23_k2 = g23_k2->imag;
299
306
307 if ( has_ident )
308 {
309 // Apply to pairs of columns as needed.
310
311 if ( !is_ident23_k1 )
313 &gamma23_k1,
314 &sigma23_k1,
315 a2, 1,
316 a3, 1 );
317
318 if ( !is_ident34_k1 )
320 &gamma34_k1,
321 &sigma34_k1,
322 a3, 1,
323 a4, 1 );
324
325 if ( !is_ident12_k2 )
327 &gamma12_k2,
328 &sigma12_k2,
329 a1, 1,
330 a2, 1 );
331
332 if ( !is_ident23_k2 )
334 &gamma23_k2,
335 &sigma23_k2,
336 a2, 1,
337 a3, 1 );
338 }
339 else
340 {
341 // Apply to all four columns.
342
344 &gamma23_k1,
345 &sigma23_k1,
346 &gamma34_k1,
347 &sigma34_k1,
348 &gamma12_k2,
349 &sigma12_k2,
350 &gamma23_k2,
351 &sigma23_k2,
352 a1, 1,
353 a2, 1,
354 a3, 1,
355 a4, 1 );
356 }
357 }
358
359 if ( n_left == 1 )
360 {
361 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
362 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
363 a2 = buff_A + (g )*cs_A;
364 a3 = buff_A + (g + 1)*cs_A;
365 a4 = buff_A + (g + 2)*cs_A;
366
367 gamma23_k1 = g23_k1->real;
368 sigma23_k1 = g23_k1->imag;
369 gamma34_k1 = g34_k1->real;
370 sigma34_k1 = g34_k1->imag;
371
374
376 {
378 &gamma23_k1,
379 &sigma23_k1,
380 a2, 1,
381 a3, 1 );
382 }
383 else if ( is_ident23_k1 && !is_ident34_k1 )
384 {
386 &gamma34_k1,
387 &sigma34_k1,
388 a3, 1,
389 a4, 1 );
390 }
391 else
392 {
394 &gamma23_k1,
395 &sigma23_k1,
396 &gamma34_k1,
397 &sigma34_k1,
398 a2, 1,
399 a3, 1,
400 a4, 1 );
401 }
402 }
403 }
404
405 // Shutdown stage
406
407 for ( j = nG % n_fuse; j < k_G; j += n_fuse )
408 {
409 g = nG - 1;
410 k = j;
411
412 //n_left = 1;
413 //if ( n_left == 1 )
414 {
415 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
416 a2 = buff_A + (g )*cs_A;
417 a3 = buff_A + (g + 1)*cs_A;
418
419 gamma23_k1 = g23_k1->real;
420 sigma23_k1 = g23_k1->imag;
421
423
424 if ( !is_ident23_k1 )
426 &gamma23_k1,
427 &sigma23_k1,
428 a2, 1,
429 a3, 1 );
430 ++k;
431 --g;
432 }
433
434 nG_app = k_minus_1 - j;
435 n_iter = nG_app / k_fuse;
436 n_left = nG_app % k_fuse;
437
438 for ( i = 0; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
439 {
440 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
441 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
442 g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
443 g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
444 a1 = buff_A + (g - 1)*cs_A;
445 a2 = buff_A + (g )*cs_A;
446 a3 = buff_A + (g + 1)*cs_A;
447 a4 = buff_A + (g + 2)*cs_A;
448
449 gamma23_k1 = g23_k1->real;
450 sigma23_k1 = g23_k1->imag;
451 gamma34_k1 = g34_k1->real;
452 sigma34_k1 = g34_k1->imag;
453 gamma12_k2 = g12_k2->real;
454 sigma12_k2 = g12_k2->imag;
455 gamma23_k2 = g23_k2->real;
456 sigma23_k2 = g23_k2->imag;
457
464
465 if ( has_ident )
466 {
467 // Apply to pairs of columns as needed.
468
469 if ( !is_ident23_k1 )
471 &gamma23_k1,
472 &sigma23_k1,
473 a2, 1,
474 a3, 1 );
475
476 if ( !is_ident34_k1 )
478 &gamma34_k1,
479 &sigma34_k1,
480 a3, 1,
481 a4, 1 );
482
483 if ( !is_ident12_k2 )
485 &gamma12_k2,
486 &sigma12_k2,
487 a1, 1,
488 a2, 1 );
489
490 if ( !is_ident23_k2 )
492 &gamma23_k2,
493 &sigma23_k2,
494 a2, 1,
495 a3, 1 );
496 }
497 else
498 {
499 // Apply to all four columns.
500
502 &gamma23_k1,
503 &sigma23_k1,
504 &gamma34_k1,
505 &sigma34_k1,
506 &gamma12_k2,
507 &sigma12_k2,
508 &gamma23_k2,
509 &sigma23_k2,
510 a1, 1,
511 a2, 1,
512 a3, 1,
513 a4, 1 );
514 }
515 }
516
517 if ( n_left == 1 )
518 {
519 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
520 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
521 a2 = buff_A + (g )*cs_A;
522 a3 = buff_A + (g + 1)*cs_A;
523 a4 = buff_A + (g + 2)*cs_A;
524
525 gamma23_k1 = g23_k1->real;
526 sigma23_k1 = g23_k1->imag;
527 gamma34_k1 = g34_k1->real;
528 sigma34_k1 = g34_k1->imag;
529
532
534 {
536 &gamma23_k1,
537 &sigma23_k1,
538 a2, 1,
539 a3, 1 );
540 }
541 else if ( is_ident23_k1 && !is_ident34_k1 )
542 {
544 &gamma34_k1,
545 &sigma34_k1,
546 a3, 1,
547 a4, 1 );
548 }
549 else
550 {
552 &gamma23_k1,
553 &sigma23_k1,
554 &gamma34_k1,
555 &sigma34_k1,
556 a2, 1,
557 a3, 1,
558 a4, 1 );
559 }
560 }
561 }
562
563 return FLA_SUCCESS;
564}
FLA_Error FLA_Apply_G_rf_ass_var1(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var1.c:113

References bl1_s0(), bl1_s1(), FLA_Apply_G_rf_ass_var1(), i, and n_left.

Referenced by FLA_Apply_G_rf_asm_var3().

◆ FLA_Apply_G_rf_asz_var3()

FLA_Error FLA_Apply_G_rf_asz_var3 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)
1478{
1479 double one = bl1_d1();
1480 double zero = bl1_d0();
1481 double gamma23_k1;
1482 double sigma23_k1;
1483 double gamma34_k1;
1484 double sigma34_k1;
1485 double gamma12_k2;
1486 double sigma12_k2;
1487 double gamma23_k2;
1488 double sigma23_k2;
1489 dcomplex* a1;
1490 dcomplex* a2;
1491 dcomplex* a3;
1492 dcomplex* a4;
1497 int i, j, g, k;
1498 int nG, nG_app;
1499 int n_iter;
1500 int n_left;
1501 int k_minus_1;
1502 int n_fuse;
1503 int k_fuse;
1506 int has_ident;
1507
1508 k_minus_1 = k_G - 1;
1509 nG = n_A - 1;
1510 n_fuse = 2;
1511 k_fuse = 2;
1512
1513 // Use the simple variant for nG < (k - 1) or k == 1.
1514 if ( nG < 2*k_minus_1 || k_G == 1 )
1515 {
1517 m_A,
1518 n_A,
1519 buff_G, rs_G, cs_G,
1520 buff_A, rs_A, cs_A );
1521 return FLA_SUCCESS;
1522 }
1523
1524
1525 // Start-up phase.
1526
1527 for ( j = -1; j < k_minus_1; j += n_fuse )
1528 {
1529 nG_app = j + 2;
1530 n_iter = nG_app / k_fuse;
1531 //n_iter = nG_app % k_fuse;
1532 n_left = 1;
1533
1534 for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
1535 {
1536 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1537 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1538 g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
1539 g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
1540 a1 = buff_A + (g - 1)*cs_A;
1541 a2 = buff_A + (g )*cs_A;
1542 a3 = buff_A + (g + 1)*cs_A;
1543 a4 = buff_A + (g + 2)*cs_A;
1544
1545 gamma23_k1 = g23_k1->real;
1546 sigma23_k1 = g23_k1->imag;
1547 gamma34_k1 = g34_k1->real;
1548 sigma34_k1 = g34_k1->imag;
1549 gamma12_k2 = g12_k2->real;
1550 sigma12_k2 = g12_k2->imag;
1551 gamma23_k2 = g23_k2->real;
1552 sigma23_k2 = g23_k2->imag;
1553
1560
1561 if ( has_ident )
1562 {
1563 // Apply to pairs of columns as needed.
1564
1565 if ( !is_ident23_k1 )
1567 &gamma23_k1,
1568 &sigma23_k1,
1569 a2, 1,
1570 a3, 1 );
1571
1572 if ( !is_ident34_k1 )
1574 &gamma34_k1,
1575 &sigma34_k1,
1576 a3, 1,
1577 a4, 1 );
1578
1579 if ( !is_ident12_k2 )
1581 &gamma12_k2,
1582 &sigma12_k2,
1583 a1, 1,
1584 a2, 1 );
1585
1586 if ( !is_ident23_k2 )
1588 &gamma23_k2,
1589 &sigma23_k2,
1590 a2, 1,
1591 a3, 1 );
1592 }
1593 else
1594 {
1595 // Apply to all four columns.
1596
1598 &gamma23_k1,
1599 &sigma23_k1,
1600 &gamma34_k1,
1601 &sigma34_k1,
1602 &gamma12_k2,
1603 &sigma12_k2,
1604 &gamma23_k2,
1605 &sigma23_k2,
1606 a1, 1,
1607 a2, 1,
1608 a3, 1,
1609 a4, 1 );
1610 }
1611 }
1612
1613 if ( n_left == 1 )
1614 {
1615 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1616 a3 = buff_A + (g + 1)*cs_A;
1617 a4 = buff_A + (g + 2)*cs_A;
1618
1619 gamma34_k1 = g34_k1->real;
1620 sigma34_k1 = g34_k1->imag;
1621
1623
1624 if ( !is_ident34_k1 )
1626 &gamma34_k1,
1627 &sigma34_k1,
1628 a3, 1,
1629 a4, 1 );
1630 }
1631 }
1632
1633 // Pipeline stage
1634
1635 for ( ; j < nG - 1; j += n_fuse )
1636 {
1637 nG_app = k_G;
1638 n_iter = nG_app / k_fuse;
1639 n_left = nG_app % k_fuse;
1640
1641 for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
1642 {
1643 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1644 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1645 g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
1646 g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
1647 a1 = buff_A + (g - 1)*cs_A;
1648 a2 = buff_A + (g )*cs_A;
1649 a3 = buff_A + (g + 1)*cs_A;
1650 a4 = buff_A + (g + 2)*cs_A;
1651
1652 gamma23_k1 = g23_k1->real;
1653 sigma23_k1 = g23_k1->imag;
1654 gamma34_k1 = g34_k1->real;
1655 sigma34_k1 = g34_k1->imag;
1656 gamma12_k2 = g12_k2->real;
1657 sigma12_k2 = g12_k2->imag;
1658 gamma23_k2 = g23_k2->real;
1659 sigma23_k2 = g23_k2->imag;
1660
1667
1668 if ( has_ident )
1669 {
1670 // Apply to pairs of columns as needed.
1671
1672 if ( !is_ident23_k1 )
1674 &gamma23_k1,
1675 &sigma23_k1,
1676 a2, 1,
1677 a3, 1 );
1678
1679 if ( !is_ident34_k1 )
1681 &gamma34_k1,
1682 &sigma34_k1,
1683 a3, 1,
1684 a4, 1 );
1685
1686 if ( !is_ident12_k2 )
1688 &gamma12_k2,
1689 &sigma12_k2,
1690 a1, 1,
1691 a2, 1 );
1692
1693 if ( !is_ident23_k2 )
1695 &gamma23_k2,
1696 &sigma23_k2,
1697 a2, 1,
1698 a3, 1 );
1699 }
1700 else
1701 {
1702 // Apply to all four columns.
1703
1705 &gamma23_k1,
1706 &sigma23_k1,
1707 &gamma34_k1,
1708 &sigma34_k1,
1709 &gamma12_k2,
1710 &sigma12_k2,
1711 &gamma23_k2,
1712 &sigma23_k2,
1713 a1, 1,
1714 a2, 1,
1715 a3, 1,
1716 a4, 1 );
1717 }
1718 }
1719
1720 if ( n_left == 1 )
1721 {
1722 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1723 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1724 a2 = buff_A + (g )*cs_A;
1725 a3 = buff_A + (g + 1)*cs_A;
1726 a4 = buff_A + (g + 2)*cs_A;
1727
1728 gamma23_k1 = g23_k1->real;
1729 sigma23_k1 = g23_k1->imag;
1730 gamma34_k1 = g34_k1->real;
1731 sigma34_k1 = g34_k1->imag;
1732
1735
1736 if ( !is_ident23_k1 && is_ident34_k1 )
1737 {
1739 &gamma23_k1,
1740 &sigma23_k1,
1741 a2, 1,
1742 a3, 1 );
1743 }
1744 else if ( is_ident23_k1 && !is_ident34_k1 )
1745 {
1747 &gamma34_k1,
1748 &sigma34_k1,
1749 a3, 1,
1750 a4, 1 );
1751 }
1752 else
1753 {
1755 &gamma23_k1,
1756 &sigma23_k1,
1757 &gamma34_k1,
1758 &sigma34_k1,
1759 a2, 1,
1760 a3, 1,
1761 a4, 1 );
1762 }
1763 }
1764 }
1765
1766 // Shutdown stage
1767
1768 for ( j = nG % n_fuse; j < k_G; j += n_fuse )
1769 {
1770 g = nG - 1;
1771 k = j;
1772
1773 //n_left = 1;
1774 //if ( n_left == 1 )
1775 {
1776 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1777 a2 = buff_A + (g )*cs_A;
1778 a3 = buff_A + (g + 1)*cs_A;
1779
1780 gamma23_k1 = g23_k1->real;
1781 sigma23_k1 = g23_k1->imag;
1782
1784
1785 if ( !is_ident23_k1 )
1787 &gamma23_k1,
1788 &sigma23_k1,
1789 a2, 1,
1790 a3, 1 );
1791 ++k;
1792 --g;
1793 }
1794
1795 nG_app = k_minus_1 - j;
1796 n_iter = nG_app / k_fuse;
1797 n_left = nG_app % k_fuse;
1798
1799 for ( i = 0; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
1800 {
1801 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1802 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1803 g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
1804 g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
1805 a1 = buff_A + (g - 1)*cs_A;
1806 a2 = buff_A + (g )*cs_A;
1807 a3 = buff_A + (g + 1)*cs_A;
1808 a4 = buff_A + (g + 2)*cs_A;
1809
1810 gamma23_k1 = g23_k1->real;
1811 sigma23_k1 = g23_k1->imag;
1812 gamma34_k1 = g34_k1->real;
1813 sigma34_k1 = g34_k1->imag;
1814 gamma12_k2 = g12_k2->real;
1815 sigma12_k2 = g12_k2->imag;
1816 gamma23_k2 = g23_k2->real;
1817 sigma23_k2 = g23_k2->imag;
1818
1825
1826 if ( has_ident )
1827 {
1828 // Apply to pairs of columns as needed.
1829
1830 if ( !is_ident23_k1 )
1832 &gamma23_k1,
1833 &sigma23_k1,
1834 a2, 1,
1835 a3, 1 );
1836
1837 if ( !is_ident34_k1 )
1839 &gamma34_k1,
1840 &sigma34_k1,
1841 a3, 1,
1842 a4, 1 );
1843
1844 if ( !is_ident12_k2 )
1846 &gamma12_k2,
1847 &sigma12_k2,
1848 a1, 1,
1849 a2, 1 );
1850
1851 if ( !is_ident23_k2 )
1853 &gamma23_k2,
1854 &sigma23_k2,
1855 a2, 1,
1856 a3, 1 );
1857 }
1858 else
1859 {
1860 // Apply to all four columns.
1861
1863 &gamma23_k1,
1864 &sigma23_k1,
1865 &gamma34_k1,
1866 &sigma34_k1,
1867 &gamma12_k2,
1868 &sigma12_k2,
1869 &gamma23_k2,
1870 &sigma23_k2,
1871 a1, 1,
1872 a2, 1,
1873 a3, 1,
1874 a4, 1 );
1875 }
1876 }
1877
1878 if ( n_left == 1 )
1879 {
1880 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1881 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1882 a2 = buff_A + (g )*cs_A;
1883 a3 = buff_A + (g + 1)*cs_A;
1884 a4 = buff_A + (g + 2)*cs_A;
1885
1886 gamma23_k1 = g23_k1->real;
1887 sigma23_k1 = g23_k1->imag;
1888 gamma34_k1 = g34_k1->real;
1889 sigma34_k1 = g34_k1->imag;
1890
1893
1894 if ( !is_ident23_k1 && is_ident34_k1 )
1895 {
1897 &gamma23_k1,
1898 &sigma23_k1,
1899 a2, 1,
1900 a3, 1 );
1901 }
1902 else if ( is_ident23_k1 && !is_ident34_k1 )
1903 {
1905 &gamma34_k1,
1906 &sigma34_k1,
1907 a3, 1,
1908 a4, 1 );
1909 }
1910 else
1911 {
1913 &gamma23_k1,
1914 &sigma23_k1,
1915 &gamma34_k1,
1916 &sigma34_k1,
1917 a2, 1,
1918 a3, 1,
1919 a4, 1 );
1920 }
1921 }
1922 }
1923
1924 return FLA_SUCCESS;
1925}
FLA_Error FLA_Apply_G_rf_asz_var1(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var1.c:267

References bl1_d0(), bl1_d1(), FLA_Apply_G_rf_asz_var1(), i, and n_left.

Referenced by FLA_Apply_G_rf_asm_var3().