libflame revision_anchor
Functions | Variables
FLASH_Queue.c File Reference

(r)

Functions

void FLASH_Queue_begin (void)
 
void FLASH_Queue_end (void)
 
unsigned int FLASH_Queue_stack_depth (void)
 
FLA_Error FLASH_Queue_enable (void)
 
FLA_Error FLASH_Queue_disable (void)
 
FLA_Bool FLASH_Queue_get_enabled (void)
 
void FLASH_Queue_set_num_threads (unsigned int n_threads)
 
unsigned int FLASH_Queue_get_num_threads (void)
 
void FLASH_Queue_init (void)
 
void FLASH_Queue_finalize (void)
 
unsigned int FLASH_Queue_get_num_tasks (void)
 
void FLASH_Queue_set_verbose_output (FLASH_Verbose verbose)
 
FLASH_Verbose FLASH_Queue_get_verbose_output (void)
 
void FLASH_Queue_set_sorting (FLA_Bool sorting)
 
FLA_Bool FLASH_Queue_get_sorting (void)
 
void FLASH_Queue_set_caching (FLA_Bool caching)
 
FLA_Bool FLASH_Queue_get_caching (void)
 
void FLASH_Queue_set_work_stealing (FLA_Bool work_stealing)
 
FLA_Bool FLASH_Queue_get_work_stealing (void)
 
void FLASH_Queue_set_data_affinity (FLASH_Data_aff data_affinity)
 
FLASH_Data_aff FLASH_Queue_get_data_affinity (void)
 
double FLASH_Queue_get_total_time (void)
 
double FLASH_Queue_get_parallel_time (void)
 
void FLASH_Queue_set_parallel_time (double dtime)
 
void FLASH_Queue_set_block_size (dim_t size)
 
dim_t FLASH_Queue_get_block_size (void)
 
void FLASH_Queue_set_cache_size (dim_t size)
 
dim_t FLASH_Queue_get_cache_size (void)
 
void FLASH_Queue_set_cache_line_size (dim_t size)
 
dim_t FLASH_Queue_get_cache_line_size (void)
 
void FLASH_Queue_set_cores_per_cache (int cores)
 
int FLASH_Queue_get_cores_per_cache (void)
 
void FLASH_Queue_set_cores_per_queue (int cores)
 
int FLASH_Queue_get_cores_per_queue (void)
 
void FLASH_Queue_reset (void)
 
FLASH_TaskFLASH_Queue_get_head_task (void)
 
FLASH_TaskFLASH_Queue_get_tail_task (void)
 
void FLASH_Queue_push (void *func, void *cntl, char *name, FLA_Bool enabled_gpu, int n_int_args, int n_fla_args, int n_input_args, int n_output_args,...)
 
void FLASH_Queue_push_input (FLA_Obj obj, FLASH_Task *t)
 
void FLASH_Queue_push_output (FLA_Obj obj, FLASH_Task *t)
 
FLASH_TaskFLASH_Task_alloc (void *func, void *cntl, char *name, FLA_Bool enabled_gpu, int n_int_args, int n_fla_args, int n_input_args, int n_output_args)
 
void FLASH_Task_free (FLASH_Task *t)
 
void FLASH_Queue_exec_task (FLASH_Task *t)
 
void FLASH_Queue_verbose_output (void)
 

Variables

FLASH_Queue _tq
 

Function Documentation

◆ FLASH_Queue_begin()

void FLASH_Queue_begin ( void  )
65{
66#ifdef FLA_ENABLE_SUPERMATRIX
67 if ( flash_queue_stack == 0 )
68 {
69 // Save the starting time for the total execution time.
70 flash_queue_total_time = FLA_Clock();
71 }
72#endif
73
74 // Push onto the stack.
75 flash_queue_stack++;
76
77 return;
78}
double FLA_Clock(void)
Definition FLA_Clock.c:20

References FLA_Clock().

Referenced by FLASH_Apply_CAQ_UT_inc(), FLASH_Apply_Q2_UT(), FLASH_Apply_Q_UT(), FLASH_Apply_Q_UT_inc(), FLASH_Apply_QUD_UT_inc(), FLASH_CAQR_UT_inc_noopt(), FLASH_Chol(), FLASH_Copy(), FLASH_Copyr(), FLASH_Eig_gest(), FLASH_Gemm(), FLASH_Hemm(), FLASH_Her2k(), FLASH_Herk(), FLASH_LQ_UT(), FLASH_LU_incpiv_noopt(), FLASH_LU_incpiv_opt1(), FLASH_LU_nopiv(), FLASH_LU_piv(), FLASH_Lyap(), FLASH_QR2_UT(), FLASH_QR_UT(), FLASH_QR_UT_inc_noopt(), FLASH_QR_UT_inc_opt1(), FLASH_SPDinv(), FLASH_Sylv(), FLASH_Symm(), FLASH_Syr2k(), FLASH_Syrk(), FLASH_Trinv(), FLASH_Trmm(), FLASH_Trsm(), FLASH_Ttmm(), and FLASH_UDdate_UT_inc().

◆ FLASH_Queue_disable()

FLA_Error FLASH_Queue_disable ( void  )
150{
151#ifdef FLA_ENABLE_SUPERMATRIX
152 if ( flash_queue_stack == 0 )
153 {
154 // Disable if not begin parallel region yet.
155 flash_queue_enabled = FALSE;
156 return FLA_SUCCESS;
157 }
158 else
159 {
160 // Cannot change status during parallel region.
161 return FLA_FAILURE;
162 }
163#else
164 // Allow disabling enqueuing even when SuperMatrix is not configured.
165 flash_queue_enabled = FALSE;
166 return FLA_SUCCESS;
167#endif
168}
int i
Definition bl1_axmyv2.c:145

Referenced by FLASH_Apply_pivots(), FLASH_Axpy(), FLASH_Axpyt(), FLASH_Copyt(), FLASH_FS_incpiv(), FLASH_Gemv(), FLASH_Scal(), FLASH_Scalr(), and FLASH_Trsv().

◆ FLASH_Queue_enable()

FLA_Error FLASH_Queue_enable ( void  )
123{
124#ifdef FLA_ENABLE_SUPERMATRIX
125 if ( flash_queue_stack == 0 )
126 {
127 // Enable if not begin parallel region yet.
128 flash_queue_enabled = TRUE;
129 return FLA_SUCCESS;
130 }
131 else
132 {
133 // Cannot change status during parallel region.
134 return FLA_FAILURE;
135 }
136#else
137 // Raise an exception when SuperMatrix is not configured.
139 return FLA_FAILURE;
140#endif
141}

Referenced by FLASH_Apply_pivots(), FLASH_Axpy(), FLASH_Axpyt(), FLASH_Copyt(), FLASH_FS_incpiv(), FLASH_Gemv(), FLASH_Scal(), FLASH_Scalr(), and FLASH_Trsv().

◆ FLASH_Queue_end()

void FLASH_Queue_end ( void  )
87{
88 // Pop off the stack.
89 flash_queue_stack--;
90
91#ifdef FLA_ENABLE_SUPERMATRIX
92 if ( flash_queue_stack == 0 )
93 {
94 // Execute tasks if encounter the outermost parallel region.
96
97 // Find the total execution time.
98 flash_queue_total_time = FLA_Clock() - flash_queue_total_time;
99 }
100#endif
101
102 return;
103}
void FLASH_Queue_exec(void)
Definition FLASH_Queue_exec.c:2756

References FLA_Clock(), and FLASH_Queue_exec().

Referenced by FLASH_Apply_CAQ_UT_inc(), FLASH_Apply_Q2_UT(), FLASH_Apply_Q_UT(), FLASH_Apply_Q_UT_inc(), FLASH_Apply_QUD_UT_inc(), FLASH_CAQR_UT_inc_noopt(), FLASH_Chol(), FLASH_Copy(), FLASH_Copyr(), FLASH_Eig_gest(), FLASH_Gemm(), FLASH_Hemm(), FLASH_Her2k(), FLASH_Herk(), FLASH_LQ_UT(), FLASH_LU_incpiv_noopt(), FLASH_LU_incpiv_opt1(), FLASH_LU_nopiv(), FLASH_LU_piv(), FLASH_Lyap(), FLASH_QR2_UT(), FLASH_QR_UT(), FLASH_QR_UT_inc_noopt(), FLASH_QR_UT_inc_opt1(), FLASH_SPDinv(), FLASH_Sylv(), FLASH_Symm(), FLASH_Syr2k(), FLASH_Syrk(), FLASH_Trinv(), FLASH_Trmm(), FLASH_Trsm(), FLASH_Ttmm(), and FLASH_UDdate_UT_inc().

◆ FLASH_Queue_exec_task()

void FLASH_Queue_exec_task ( FLASH_Task t)
1147{
1148 // Define local function pointer types.
1149
1150 // LAPACK-level
1153 typedef FLA_Error(*flash_lu_piv_p)(FLA_Obj A, FLA_Obj p, fla_lu_t* cntl);
1158 typedef FLA_Error(*flash_lu_nopiv_p)(FLA_Obj A, fla_lu_t* cntl);
1160 typedef FLA_Error(*flash_ttmm_p)(FLA_Uplo uplo, FLA_Obj A, fla_ttmm_t* cntl);
1161 typedef FLA_Error(*flash_chol_p)(FLA_Uplo uplo, FLA_Obj A, fla_chol_t* cntl);
1165 typedef FLA_Error(*flash_qrut_p)(FLA_Obj A, FLA_Obj T, fla_qrut_t* cntl);
1176
1177 // Level-3 BLAS
1187
1188 // Level-2 BLAS
1191
1192 // Level-1 BLAS
1195 typedef FLA_Error(*flash_copy_p)(FLA_Obj A, FLA_Obj B, fla_copy_t* cntl);
1197 typedef FLA_Error(*flash_copyr_p)(FLA_Uplo uplo, FLA_Obj A, FLA_Obj B, fla_copyr_t* cntl);
1200
1201 // Base
1202 typedef FLA_Error(*flash_obj_create_buffer_p)(dim_t rs, dim_t cs, FLA_Obj A, void* cntl);
1203 typedef FLA_Error(*flash_obj_free_buffer_p)(FLA_Obj A, void* cntl);
1204
1205 // Only execute task if it is not NULL.
1206 if ( t == NULL )
1207 return;
1208
1209 // Now "switch" between the various possible task functions.
1210
1211 // FLA_LU_piv_macro
1212 if ( t->func == (void *) FLA_LU_piv_macro_task )
1213 {
1215 func = (flash_lu_piv_macro_p) t->func;
1216
1217 func( t->output_arg[0],
1218 t->output_arg[1],
1219 ( fla_lu_t* ) t->cntl );
1220 }
1221 // FLA_Apply_pivots_macro
1222 else if ( t->func == (void *) FLA_Apply_pivots_macro_task )
1223 {
1225 func = (flash_apply_pivots_macro_p) t->func;
1226
1227 func( ( FLA_Side ) t->int_arg[0],
1228 ( FLA_Trans ) t->int_arg[1],
1229 t->input_arg[0],
1230 t->output_arg[0],
1231 ( fla_appiv_t* ) t->cntl );
1232 }
1233 // FLA_LU_piv
1234 else if ( t->func == (void *) FLA_LU_piv_task )
1235 {
1236 flash_lu_piv_p func;
1237 func = (flash_lu_piv_p) t->func;
1238
1239 func( t->output_arg[0],
1240 t->fla_arg[0],
1241 ( fla_lu_t* ) t->cntl );
1242 }
1243 // FLA_LU_piv_copy
1244 else if ( t->func == (void *) FLA_LU_piv_copy_task )
1245 {
1247 func = (flash_lu_piv_copy_p) t->func;
1248
1249 func( t->output_arg[0],
1250 t->fla_arg[0],
1251 t->output_arg[1],
1252 ( fla_lu_t* ) t->cntl );
1253 }
1254 // FLA_Trsm_piv
1255 else if ( t->func == (void *) FLA_Trsm_piv_task )
1256 {
1257 flash_trsm_piv_p func;
1258 func = (flash_trsm_piv_p) t->func;
1259
1260 func( t->input_arg[0],
1261 t->output_arg[0],
1262 t->fla_arg[0],
1263 ( fla_trsm_t* ) t->cntl );
1264 }
1265 // FLA_SA_LU
1266 else if ( t->func == (void *) FLA_SA_LU_task )
1267 {
1268 flash_sa_lu_p func;
1269 func = (flash_sa_lu_p) t->func;
1270
1271 func( t->output_arg[1],
1272 t->output_arg[0],
1273 t->fla_arg[0],
1274 t->fla_arg[1],
1275 t->int_arg[0],
1276 ( fla_lu_t* ) t->cntl );
1277 }
1278 // FLA_SA_FS
1279 else if ( t->func == (void *) FLA_SA_FS_task )
1280 {
1281 flash_sa_fs_p func;
1282 func = (flash_sa_fs_p) t->func;
1283
1284 func( t->fla_arg[0],
1285 t->input_arg[0],
1286 t->fla_arg[1],
1287 t->output_arg[1],
1288 t->output_arg[0],
1289 t->int_arg[0],
1290 ( fla_gemm_t* ) t->cntl );
1291 }
1292 // FLA_LU_nopiv
1293 else if ( t->func == (void *) FLA_LU_nopiv_task )
1294 {
1295 flash_lu_nopiv_p func;
1296 func = (flash_lu_nopiv_p) t->func;
1297
1298 func( t->output_arg[0],
1299 ( fla_lu_t* ) t->cntl );
1300 }
1301 // FLA_Trinv
1302 else if ( t->func == (void *) FLA_Trinv_task )
1303 {
1304 flash_trinv_p func;
1305 func = (flash_trinv_p) t->func;
1306
1307 func( ( FLA_Uplo ) t->int_arg[0],
1308 ( FLA_Diag ) t->int_arg[1],
1309 t->output_arg[0],
1310 ( fla_trinv_t* ) t->cntl );
1311 }
1312 // FLA_Ttmm
1313 else if ( t->func == (void *) FLA_Ttmm_task )
1314 {
1315 flash_ttmm_p func;
1316 func = (flash_ttmm_p) t->func;
1317
1318 func( ( FLA_Uplo ) t->int_arg[0],
1319 t->output_arg[0],
1320 ( fla_ttmm_t* ) t->cntl );
1321 }
1322 // FLA_Chol
1323 else if ( t->func == (void *) FLA_Chol_task )
1324 {
1325 flash_chol_p func;
1326 func = (flash_chol_p) t->func;
1327
1328 func( ( FLA_Uplo ) t->int_arg[0],
1329 t->output_arg[0],
1330 ( fla_chol_t* ) t->cntl );
1331 }
1332 // FLA_Sylv
1333 else if ( t->func == (void *) FLA_Sylv_task )
1334 {
1335 flash_sylv_p func;
1336 func = (flash_sylv_p) t->func;
1337
1338 func( ( FLA_Trans ) t->int_arg[0],
1339 ( FLA_Trans ) t->int_arg[1],
1340 t->fla_arg[0],
1341 t->input_arg[0],
1342 t->input_arg[1],
1343 t->output_arg[0],
1344 t->fla_arg[1],
1345 ( fla_sylv_t* ) t->cntl );
1346 }
1347 // FLA_Lyap
1348 else if ( t->func == (void *) FLA_Lyap_task )
1349 {
1350 flash_lyap_p func;
1351 func = (flash_lyap_p) t->func;
1352
1353 func( ( FLA_Trans ) t->int_arg[0],
1354 t->fla_arg[0],
1355 t->input_arg[0],
1356 t->output_arg[0],
1357 t->fla_arg[1],
1358 ( fla_lyap_t* ) t->cntl );
1359 }
1360 // FLA_QR_UT_macro
1361 else if ( t->func == (void *) FLA_QR_UT_macro_task )
1362 {
1363 flash_qrut_macro_p func;
1364 func = (flash_qrut_macro_p) t->func;
1365
1366 func( t->output_arg[0],
1367 t->output_arg[1],
1368 ( fla_qrut_t* ) t->cntl );
1369 }
1370 // FLA_QR_UT
1371 else if ( t->func == (void *) FLA_QR_UT_task )
1372 {
1373 flash_qrut_p func;
1374 func = (flash_qrut_p) t->func;
1375
1376 func( t->output_arg[0],
1377 t->fla_arg[0],
1378 ( fla_qrut_t* ) t->cntl );
1379 }
1380 // FLA_QR_UT_copy
1381 else if ( t->func == (void *) FLA_QR_UT_copy_task )
1382 {
1383 flash_qrutc_p func;
1384 func = (flash_qrutc_p) t->func;
1385
1386 func( t->output_arg[0],
1387 t->fla_arg[0],
1388 t->output_arg[1],
1389 ( fla_qrut_t* ) t->cntl );
1390 }
1391 // FLA_QR2_UT
1392 else if ( t->func == (void *) FLA_QR2_UT_task )
1393 {
1394 flash_qr2ut_p func;
1395 func = (flash_qr2ut_p) t->func;
1396
1397 func( t->output_arg[1],
1398 t->output_arg[0],
1399 t->fla_arg[0],
1400 ( fla_qr2ut_t* ) t->cntl );
1401 }
1402 // FLA_LQ_UT_macro
1403 else if ( t->func == (void *) FLA_LQ_UT_macro_task )
1404 {
1405 flash_lqut_macro_p func;
1406 func = (flash_lqut_macro_p) t->func;
1407
1408 func( t->output_arg[0],
1409 t->output_arg[1],
1410 ( fla_lqut_t* ) t->cntl );
1411 }
1412 // FLA_CAQR2_UT
1413 else if ( t->func == (void *) FLA_CAQR2_UT_task )
1414 {
1415 flash_caqr2ut_p func;
1416 func = (flash_caqr2ut_p) t->func;
1417
1418 func( t->output_arg[1],
1419 t->output_arg[0],
1420 t->fla_arg[0],
1421 ( fla_caqr2ut_t* ) t->cntl );
1422 }
1423 // FLA_UDdate_UT
1424 else if ( t->func == (void *) FLA_UDdate_UT_task )
1425 {
1426 flash_uddateut_p func;
1427 func = (flash_uddateut_p) t->func;
1428
1429 func( t->output_arg[0],
1430 t->output_arg[1],
1431 t->output_arg[2],
1432 t->output_arg[3],
1433 ( fla_uddateut_t* ) t->cntl );
1434 }
1435 // FLA_Apply_Q_UT
1436 else if ( t->func == (void *) FLA_Apply_Q_UT_task )
1437 {
1438 flash_apqut_p func;
1439 func = (flash_apqut_p) t->func;
1440
1441 func( ( FLA_Side ) t->int_arg[0],
1442 ( FLA_Trans ) t->int_arg[1],
1443 ( FLA_Direct ) t->int_arg[2],
1444 ( FLA_Store ) t->int_arg[3],
1445 t->input_arg[0],
1446 t->fla_arg[0],
1447 t->output_arg[1],
1448 t->output_arg[0],
1449 ( fla_apqut_t* ) t->cntl );
1450 }
1451 // FLA_Apply_Q2_UT
1452 else if ( t->func == (void *) FLA_Apply_Q2_UT_task )
1453 {
1454 flash_apq2ut_p func;
1455 func = (flash_apq2ut_p) t->func;
1456
1457 func( ( FLA_Side ) t->int_arg[0],
1458 ( FLA_Trans ) t->int_arg[1],
1459 ( FLA_Direct ) t->int_arg[2],
1460 ( FLA_Store ) t->int_arg[3],
1461 t->input_arg[0],
1462 t->fla_arg[0],
1463 t->output_arg[2],
1464 t->output_arg[1],
1465 t->output_arg[0],
1466 ( fla_apq2ut_t* ) t->cntl );
1467 }
1468 // FLA_Apply_CAQ2_UT
1469 else if ( t->func == (void *) FLA_Apply_CAQ2_UT_task )
1470 {
1471 flash_apcaq2ut_p func;
1472 func = (flash_apcaq2ut_p) t->func;
1473
1474 func( ( FLA_Side ) t->int_arg[0],
1475 ( FLA_Trans ) t->int_arg[1],
1476 ( FLA_Direct ) t->int_arg[2],
1477 ( FLA_Store ) t->int_arg[3],
1478 t->input_arg[0],
1479 t->fla_arg[0],
1480 t->output_arg[2],
1481 t->output_arg[1],
1482 t->output_arg[0],
1483 ( fla_apcaq2ut_t* ) t->cntl );
1484 }
1485 // FLA_Apply_QUD_UT
1486 else if ( t->func == (void *) FLA_Apply_QUD_UT_task )
1487 {
1488 flash_apqudut_p func;
1489 func = (flash_apqudut_p) t->func;
1490
1491 func( ( FLA_Side ) t->int_arg[0],
1492 ( FLA_Trans ) t->int_arg[1],
1493 ( FLA_Direct ) t->int_arg[2],
1494 ( FLA_Store ) t->int_arg[3],
1495 t->input_arg[0],
1496 t->output_arg[0],
1497 t->output_arg[1],
1498 t->input_arg[1],
1499 t->output_arg[2],
1500 t->input_arg[2],
1501 t->output_arg[3],
1502 ( fla_apqudut_t* ) t->cntl );
1503 }
1504 // FLA_Eig_gest
1505 else if ( t->func == (void *) FLA_Eig_gest_task )
1506 {
1507 flash_eig_gest_p func;
1508 func = (flash_eig_gest_p) t->func;
1509
1510 func( ( FLA_Inv ) t->int_arg[0],
1511 ( FLA_Uplo ) t->int_arg[1],
1512 t->output_arg[1],
1513 t->output_arg[0],
1514 t->input_arg[0],
1515 ( fla_eig_gest_t* ) t->cntl );
1516 }
1517 // FLA_Gemm
1518 else if ( t->func == (void *) FLA_Gemm_task )
1519 {
1520 flash_gemm_p func;
1521 func = (flash_gemm_p) t->func;
1522
1523 func( ( FLA_Trans ) t->int_arg[0],
1524 ( FLA_Trans ) t->int_arg[1],
1525 t->fla_arg[0],
1526 t->input_arg[0],
1527 t->input_arg[1],
1528 t->fla_arg[1],
1529 t->output_arg[0],
1530 ( fla_gemm_t* ) t->cntl );
1531 }
1532 // FLA_Hemm
1533 else if ( t->func == (void *) FLA_Hemm_task )
1534 {
1535 flash_hemm_p func;
1536 func = (flash_hemm_p) t->func;
1537
1538 func( ( FLA_Side ) t->int_arg[0],
1539 ( FLA_Uplo ) t->int_arg[1],
1540 t->fla_arg[0],
1541 t->input_arg[0],
1542 t->input_arg[1],
1543 t->fla_arg[1],
1544 t->output_arg[0],
1545 ( fla_hemm_t* ) t->cntl );
1546 }
1547 // FLA_Herk
1548 else if ( t->func == (void *) FLA_Herk_task )
1549 {
1550 flash_herk_p func;
1551 func = (flash_herk_p) t->func;
1552
1553 func( ( FLA_Uplo ) t->int_arg[0],
1554 ( FLA_Trans ) t->int_arg[1],
1555 t->fla_arg[0],
1556 t->input_arg[0],
1557 t->fla_arg[1],
1558 t->output_arg[0],
1559 ( fla_herk_t* ) t->cntl );
1560 }
1561 // FLA_Her2k
1562 else if ( t->func == (void *) FLA_Her2k_task )
1563 {
1564 flash_her2k_p func;
1565 func = (flash_her2k_p) t->func;
1566
1567 func( ( FLA_Uplo ) t->int_arg[0],
1568 ( FLA_Trans ) t->int_arg[1],
1569 t->fla_arg[0],
1570 t->input_arg[0],
1571 t->input_arg[1],
1572 t->fla_arg[1],
1573 t->output_arg[0],
1574 ( fla_her2k_t* ) t->cntl );
1575 }
1576 // FLA_Symm
1577 else if ( t->func == (void *) FLA_Symm_task )
1578 {
1579 flash_symm_p func;
1580 func = (flash_symm_p) t->func;
1581
1582 func( ( FLA_Side ) t->int_arg[0],
1583 ( FLA_Uplo ) t->int_arg[1],
1584 t->fla_arg[0],
1585 t->input_arg[0],
1586 t->input_arg[1],
1587 t->fla_arg[1],
1588 t->output_arg[0],
1589 ( fla_symm_t* ) t->cntl );
1590 }
1591 // FLA_Syrk
1592 else if ( t->func == (void *) FLA_Syrk_task )
1593 {
1594 flash_syrk_p func;
1595 func = (flash_syrk_p) t->func;
1596
1597 func( ( FLA_Uplo ) t->int_arg[0],
1598 ( FLA_Trans ) t->int_arg[1],
1599 t->fla_arg[0],
1600 t->input_arg[0],
1601 t->fla_arg[1],
1602 t->output_arg[0],
1603 ( fla_syrk_t* ) t->cntl );
1604 }
1605 // FLA_Syr2k
1606 else if ( t->func == (void *) FLA_Syr2k_task )
1607 {
1608 flash_syr2k_p func;
1609 func = (flash_syr2k_p) t->func;
1610
1611 func( ( FLA_Uplo ) t->int_arg[0],
1612 ( FLA_Trans ) t->int_arg[1],
1613 t->fla_arg[0],
1614 t->input_arg[0],
1615 t->input_arg[1],
1616 t->fla_arg[1],
1617 t->output_arg[0],
1618 ( fla_syr2k_t* ) t->cntl );
1619 }
1620 // FLA_Trmm
1621 else if ( t->func == (void *) FLA_Trmm_task )
1622 {
1623 flash_trmm_p func;
1624 func = (flash_trmm_p) t->func;
1625
1626 func( ( FLA_Side ) t->int_arg[0],
1627 ( FLA_Uplo ) t->int_arg[1],
1628 ( FLA_Trans ) t->int_arg[2],
1629 ( FLA_Diag ) t->int_arg[3],
1630 t->fla_arg[0],
1631 t->input_arg[0],
1632 t->output_arg[0],
1633 ( fla_trmm_t* ) t->cntl );
1634 }
1635 // FLA_Trsm
1636 else if ( t->func == (void *) FLA_Trsm_task )
1637 {
1638 flash_trsm_p func;
1639 func = (flash_trsm_p) t->func;
1640
1641 func( ( FLA_Side ) t->int_arg[0],
1642 ( FLA_Uplo ) t->int_arg[1],
1643 ( FLA_Trans ) t->int_arg[2],
1644 ( FLA_Diag ) t->int_arg[3],
1645 t->fla_arg[0],
1646 t->input_arg[0],
1647 t->output_arg[0],
1648 ( fla_trsm_t* ) t->cntl );
1649 }
1650 // FLA_Gemv
1651 else if ( t->func == (void *) FLA_Gemv_task )
1652 {
1653 flash_gemv_p func;
1654 func = (flash_gemv_p) t->func;
1655
1656 func( ( FLA_Trans ) t->int_arg[0],
1657 t->fla_arg[0],
1658 t->input_arg[0],
1659 t->input_arg[1],
1660 t->fla_arg[1],
1661 t->output_arg[0],
1662 ( fla_gemv_t* ) t->cntl );
1663 }
1664 // FLA_Trsv
1665 else if ( t->func == (void *) FLA_Trsv_task )
1666 {
1667 flash_trsv_p func;
1668 func = (flash_trsv_p) t->func;
1669
1670 func( ( FLA_Uplo ) t->int_arg[0],
1671 ( FLA_Trans ) t->int_arg[1],
1672 ( FLA_Diag ) t->int_arg[2],
1673 t->input_arg[0],
1674 t->output_arg[0],
1675 ( fla_trsv_t* ) t->cntl );
1676 }
1677 // FLA_Axpy
1678 else if ( t->func == (void *) FLA_Axpy_task )
1679 {
1680 flash_axpy_p func;
1681 func = (flash_axpy_p) t->func;
1682
1683 func( t->fla_arg[0],
1684 t->input_arg[0],
1685 t->output_arg[0],
1686 ( fla_axpy_t* ) t->cntl );
1687 }
1688 // FLA_Axpyt
1689 else if ( t->func == (void *) FLA_Axpyt_task )
1690 {
1691 flash_axpyt_p func;
1692 func = (flash_axpyt_p) t->func;
1693
1694 func( ( FLA_Trans ) t->int_arg[0],
1695 t->fla_arg[0],
1696 t->input_arg[0],
1697 t->output_arg[0],
1698 ( fla_axpyt_t* ) t->cntl );
1699 }
1700 // FLA_Copy
1701 else if ( t->func == (void *) FLA_Copy_task )
1702 {
1703 flash_copy_p func;
1704 func = (flash_copy_p) t->func;
1705
1706 func( t->input_arg[0],
1707 t->output_arg[0],
1708 ( fla_copy_t* ) t->cntl );
1709 }
1710 // FLA_Copyt
1711 else if ( t->func == (void *) FLA_Copyt_task )
1712 {
1713 flash_copyt_p func;
1714 func = (flash_copyt_p) t->func;
1715
1716 func( ( FLA_Trans ) t->int_arg[0],
1717 t->input_arg[0],
1718 t->output_arg[0],
1719 ( fla_copyt_t* ) t->cntl );
1720 }
1721 // FLA_Copyr
1722 else if ( t->func == (void *) FLA_Copyr_task )
1723 {
1724 flash_copyr_p func;
1725 func = (flash_copyr_p) t->func;
1726
1727 func( ( FLA_Uplo ) t->int_arg[0],
1728 t->input_arg[0],
1729 t->output_arg[0],
1730 ( fla_copyr_t* ) t->cntl );
1731 }
1732 // FLA_Scal
1733 else if ( t->func == (void *) FLA_Scal_task )
1734 {
1735 flash_scal_p func;
1736 func = (flash_scal_p) t->func;
1737
1738 func( t->fla_arg[0],
1739 t->output_arg[0],
1740 ( fla_scal_t* ) t->cntl );
1741 }
1742 // FLA_Scalr
1743 else if ( t->func == (void *) FLA_Scalr_task )
1744 {
1745 flash_scalr_p func;
1746 func = (flash_scalr_p) t->func;
1747
1748 func( ( FLA_Uplo ) t->int_arg[0],
1749 t->fla_arg[0],
1750 t->output_arg[0],
1751 ( fla_scalr_t* ) t->cntl );
1752 }
1753 // FLA_Obj_create_buffer
1754 else if ( t->func == (void *) FLA_Obj_create_buffer_task )
1755 {
1757 func = (flash_obj_create_buffer_p) t->func;
1758
1759 func( ( dim_t ) t->int_arg[0],
1760 ( dim_t ) t->int_arg[1],
1761 t->output_arg[0],
1762 t->cntl );
1763 }
1764 // FLA_Obj_free_buffer
1765 else if ( t->func == (void *) FLA_Obj_free_buffer_task )
1766 {
1768 func = (flash_obj_free_buffer_p) t->func;
1769
1770 func( t->output_arg[0],
1771 t->cntl );
1772 }
1773 else
1774 {
1776 }
1777
1778 return;
1779}
FLA_Error FLA_Scal_task(FLA_Obj alpha, FLA_Obj A, fla_scal_t *cntl)
Definition FLA_Scal_task.c:13
FLA_Error FLA_Scalr_task(FLA_Uplo uplo, FLA_Obj alpha, FLA_Obj A, fla_scalr_t *cntl)
Definition FLA_Scalr_task.c:13
FLA_Error FLA_Copy_task(FLA_Obj A, FLA_Obj B, fla_copy_t *cntl)
Definition FLA_Copy_task.c:13
FLA_Error FLA_Copyt_task(FLA_Trans trans, FLA_Obj A, FLA_Obj B, fla_copyt_t *cntl)
Definition FLA_Copyt_task.c:13
FLA_Error FLA_Axpy_task(FLA_Obj alpha, FLA_Obj A, FLA_Obj B, fla_axpy_t *cntl)
Definition FLA_Axpy_task.c:13
FLA_Error FLA_Copyr_task(FLA_Uplo uplo, FLA_Obj A, FLA_Obj B, fla_copyr_t *cntl)
Definition FLA_Copyr_task.c:13
FLA_Error FLA_Axpyt_task(FLA_Trans trans, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, fla_axpyt_t *cntl)
Definition FLA_Axpyt_task.c:13
FLA_Error FLA_Gemv_task(FLA_Trans transa, FLA_Obj alpha, FLA_Obj A, FLA_Obj x, FLA_Obj beta, FLA_Obj y, fla_gemv_t *cntl)
Definition FLA_Gemv_task.c:13
FLA_Error FLA_Trsv_task(FLA_Uplo uplo, FLA_Trans transa, FLA_Diag diag, FLA_Obj A, FLA_Obj x, fla_trsv_t *cntl)
Definition FLA_Trsv_task.c:13
FLA_Error FLA_Syrk_task(FLA_Uplo uplo, FLA_Trans trans, FLA_Obj alpha, FLA_Obj A, FLA_Obj beta, FLA_Obj C, fla_syrk_t *cntl)
Definition FLA_Syrk_task.c:13
FLA_Error FLA_Herk_task(FLA_Uplo uplo, FLA_Trans trans, FLA_Obj alpha, FLA_Obj A, FLA_Obj beta, FLA_Obj C, fla_herk_t *cntl)
Definition FLA_Herk_task.c:13
FLA_Error FLA_Trmm_task(FLA_Side side, FLA_Uplo uplo, FLA_Trans trans, FLA_Diag diag, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, fla_trmm_t *cntl)
Definition FLA_Trmm_task.c:13
FLA_Error FLA_Hemm_task(FLA_Side side, FLA_Uplo uplo, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C, fla_hemm_t *cntl)
Definition FLA_Hemm_task.c:13
FLA_Error FLA_Her2k_task(FLA_Uplo uplo, FLA_Trans trans, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C, fla_her2k_t *cntl)
Definition FLA_Her2k_task.c:13
FLA_Error FLA_Syr2k_task(FLA_Uplo uplo, FLA_Trans trans, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C, fla_syr2k_t *cntl)
Definition FLA_Syr2k_task.c:13
FLA_Error FLA_Symm_task(FLA_Side side, FLA_Uplo uplo, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C, fla_symm_t *cntl)
Definition FLA_Symm_task.c:13
FLA_Error FLA_Trsm_task(FLA_Side side, FLA_Uplo uplo, FLA_Trans trans, FLA_Diag diag, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, fla_trsm_t *cntl)
Definition FLA_Trsm_task.c:13
FLA_Error FLA_Gemm_task(FLA_Trans transa, FLA_Trans transb, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C, fla_gemm_t *cntl)
Definition FLA_Gemm_task.c:13
FLA_Error FLA_LU_piv_copy_task(FLA_Obj A, FLA_Obj p, FLA_Obj U, fla_lu_t *cntl)
Definition FLA_LU_piv_copy_task.c:13
FLA_Error FLA_LQ_UT_macro_task(FLA_Obj A, FLA_Obj T, fla_lqut_t *cntl)
Definition FLA_LQ_UT_macro_task.c:15
FLA_Error FLA_QR_UT_task(FLA_Obj A, FLA_Obj T, fla_qrut_t *cntl)
Definition FLA_QR_UT_task.c:15
FLA_Error FLA_QR_UT_copy_task(FLA_Obj A, FLA_Obj T, FLA_Obj U, fla_qrut_t *cntl)
Definition FLA_QR_UT_copy_task.c:15
FLA_Error FLA_Lyap_task(FLA_Trans trans, FLA_Obj isgn, FLA_Obj A, FLA_Obj C, FLA_Obj scale, fla_lyap_t *cntl)
Definition FLA_Lyap_task.c:15
FLA_Error FLA_Chol_task(FLA_Uplo uplo, FLA_Obj A, fla_chol_t *cntl)
Definition FLA_Chol_task.c:15
FLA_Error FLA_Trinv_task(FLA_Uplo uplo, FLA_Diag diag, FLA_Obj A, fla_trinv_t *cntl)
Definition FLA_Trinv_task.c:15
FLA_Error FLA_Trsm_piv_task(FLA_Obj A, FLA_Obj B, FLA_Obj p, fla_trsm_t *cntl)
Definition FLA_Trsm_piv_task.c:13
FLA_Error FLA_Apply_Q_UT_task(FLA_Side side, FLA_Trans trans, FLA_Direct direct, FLA_Store storev, FLA_Obj A, FLA_Obj T, FLA_Obj W, FLA_Obj B, fla_apqut_t *cntl)
Definition FLA_Apply_Q_UT_task.c:15
FLA_Error FLA_Apply_Q2_UT_task(FLA_Side side, FLA_Trans trans, FLA_Direct direct, FLA_Store storev, FLA_Obj D, FLA_Obj T, FLA_Obj W, FLA_Obj C, FLA_Obj E, fla_apq2ut_t *cntl)
Definition FLA_Apply_Q2_UT_task.c:15
FLA_Error FLA_LU_piv_macro_task(FLA_Obj A, FLA_Obj p, fla_lu_t *cntl)
Definition FLA_LU_piv_macro_task.c:13
FLA_Error FLA_Ttmm_task(FLA_Uplo uplo, FLA_Obj A, fla_ttmm_t *cntl)
Definition FLA_Ttmm_task.c:15
FLA_Error FLA_SA_FS_task(FLA_Obj L, FLA_Obj D, FLA_Obj p, FLA_Obj C, FLA_Obj E, dim_t nb_alg, fla_gemm_t *cntl)
Definition FLA_SA_FS_task.c:13
FLA_Error FLA_QR_UT_macro_task(FLA_Obj A, FLA_Obj T, fla_qrut_t *cntl)
Definition FLA_QR_UT_macro_task.c:15
FLA_Error FLA_QR2_UT_task(FLA_Obj B, FLA_Obj D, FLA_Obj T, fla_qr2ut_t *cntl)
Definition FLA_QR2_UT_task.c:15
FLA_Error FLA_Sylv_task(FLA_Trans transa, FLA_Trans transb, FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale, fla_sylv_t *cntl)
Definition FLA_Sylv_task.c:15
FLA_Error FLA_SA_LU_task(FLA_Obj U, FLA_Obj D, FLA_Obj p, FLA_Obj L, dim_t nb_alg, fla_lu_t *cntl)
Definition FLA_SA_LU_task.c:13
FLA_Error FLA_LU_nopiv_task(FLA_Obj A, fla_lu_t *cntl)
Definition FLA_LU_nopiv_task.c:15
FLA_Error FLA_Apply_pivots_macro_task(FLA_Side side, FLA_Trans trans, FLA_Obj p, FLA_Obj A, fla_appiv_t *cntl)
Definition FLA_Apply_pivots_macro_task.c:15
FLA_Error FLA_CAQR2_UT_task(FLA_Obj B, FLA_Obj D, FLA_Obj T, fla_caqr2ut_t *cntl)
Definition FLA_CAQR2_UT_task.c:15
FLA_Error FLA_UDdate_UT_task(FLA_Obj R, FLA_Obj C, FLA_Obj D, FLA_Obj T, fla_uddateut_t *cntl)
Definition FLA_UDdate_UT_task.c:15
FLA_Error FLA_LU_piv_task(FLA_Obj A, FLA_Obj p, fla_lu_t *cntl)
Definition FLA_LU_piv_task.c:15
FLA_Error FLA_Apply_CAQ2_UT_task(FLA_Side side, FLA_Trans trans, FLA_Direct direct, FLA_Store storev, FLA_Obj D, FLA_Obj T, FLA_Obj W, FLA_Obj C, FLA_Obj E, fla_apcaq2ut_t *cntl)
Definition FLA_Apply_CAQ2_UT_task.c:15
FLA_Error FLA_Apply_QUD_UT_task(FLA_Side side, FLA_Trans trans, FLA_Direct direct, FLA_Store storev, FLA_Obj T, FLA_Obj W, FLA_Obj R, FLA_Obj U, FLA_Obj C, FLA_Obj V, FLA_Obj D, fla_apqudut_t *cntl)
Definition FLA_Apply_QUD_UT_task.c:15
FLA_Error FLA_Eig_gest_task(FLA_Inv inv, FLA_Uplo uplo, FLA_Obj A, FLA_Obj Y, FLA_Obj B, fla_eig_gest_t *cntl)
Definition FLA_Eig_gest_task.c:16
FLA_Error FLA_Obj_create_buffer_task(dim_t rs, dim_t cs, FLA_Obj obj, void *cntl)
Definition FLA_Obj_create_buffer_task.c:13
FLA_Error FLA_Obj_free_buffer_task(FLA_Obj obj, void *cntl)
Definition FLA_Obj_free_buffer_task.c:13
int FLA_Error
Definition FLA_type_defs.h:47
int FLA_Side
Definition FLA_type_defs.h:51
int FLA_Inv
Definition FLA_type_defs.h:63
int FLA_Trans
Definition FLA_type_defs.h:53
int FLA_Store
Definition FLA_type_defs.h:59
unsigned long dim_t
Definition FLA_type_defs.h:71
int FLA_Uplo
Definition FLA_type_defs.h:52
int FLA_Diag
Definition FLA_type_defs.h:55
int FLA_Direct
Definition FLA_type_defs.h:58
Definition FLA_type_defs.h:159
Definition FLA_Cntl_lapack.h:264
Definition FLA_Cntl_lapack.h:43
Definition FLA_Cntl_lapack.h:228
Definition FLA_Cntl_lapack.h:318
Definition FLA_Cntl_lapack.h:211
Definition FLA_Cntl_blas1.h:17
Definition FLA_Cntl_blas1.h:27
Definition FLA_Cntl_lapack.h:106
Definition FLA_Cntl_lapack.h:17
Definition FLA_Cntl_blas1.h:37
Definition FLA_Cntl_blas1.h:57
Definition FLA_Cntl_blas1.h:47
Definition FLA_Cntl_lapack.h:356
Definition FLA_Cntl_blas3.h:17
Definition FLA_Cntl_blas2.h:17
Definition FLA_Cntl_blas3.h:28
Definition FLA_Cntl_blas3.h:53
Definition FLA_Cntl_blas3.h:41
Definition FLA_Cntl_lapack.h:96
Definition FLA_Cntl_lapack.h:53
Definition FLA_Cntl_lapack.h:183
Definition FLA_Cntl_lapack.h:81
Definition FLA_Cntl_lapack.h:70
Definition FLA_Cntl_blas1.h:68
Definition FLA_Cntl_blas1.h:78
Definition FLA_Cntl_lapack.h:163
Definition FLA_Cntl_blas3.h:66
Definition FLA_Cntl_blas3.h:91
Definition FLA_Cntl_blas3.h:79
Definition FLA_Cntl_lapack.h:149
Definition FLA_Cntl_blas3.h:104
Definition FLA_Cntl_blas3.h:116
Definition FLA_Cntl_blas2.h:27
Definition FLA_Cntl_lapack.h:30
Definition FLA_Cntl_lapack.h:307

References FLASH_Task_s::cntl, FLA_Apply_CAQ2_UT_task(), FLA_Apply_pivots_macro_task(), FLA_Apply_Q2_UT_task(), FLA_Apply_Q_UT_task(), FLA_Apply_QUD_UT_task(), FLASH_Task_s::fla_arg, FLA_Axpy_task(), FLA_Axpyt_task(), FLA_CAQR2_UT_task(), FLA_Chol_task(), FLA_Copy_task(), FLA_Copyr_task(), FLA_Copyt_task(), FLA_Eig_gest_task(), FLA_Gemm_task(), FLA_Gemv_task(), FLA_Hemm_task(), FLA_Her2k_task(), FLA_Herk_task(), FLA_LQ_UT_macro_task(), FLA_LU_nopiv_task(), FLA_LU_piv_copy_task(), FLA_LU_piv_macro_task(), FLA_LU_piv_task(), FLA_Lyap_task(), FLA_Obj_create_buffer_task(), FLA_Obj_free_buffer_task(), FLA_QR2_UT_task(), FLA_QR_UT_copy_task(), FLA_QR_UT_macro_task(), FLA_QR_UT_task(), FLA_SA_FS_task(), FLA_SA_LU_task(), FLA_Scal_task(), FLA_Scalr_task(), FLA_Sylv_task(), FLA_Symm_task(), FLA_Syr2k_task(), FLA_Syrk_task(), FLA_Trinv_task(), FLA_Trmm_task(), FLA_Trsm_piv_task(), FLA_Trsm_task(), FLA_Trsv_task(), FLA_Ttmm_task(), FLA_UDdate_UT_task(), FLASH_Task_s::func, FLASH_Task_s::input_arg, FLASH_Task_s::int_arg, and FLASH_Task_s::output_arg.

Referenced by FLASH_Queue_exec_gpu(), FLASH_Queue_exec_parallel_function(), and FLASH_Queue_exec_simulation().

◆ FLASH_Queue_finalize()

void FLASH_Queue_finalize ( void  )
268{
269 // Exit early if we're not already initialized.
270 if ( flash_queue_initialized == FALSE )
271 return;
272
273 // Clear the initialized flag.
274 flash_queue_initialized = FALSE;
275
276#ifdef FLA_ENABLE_GPU
278#endif
279
280 return;
281}
void FLASH_Queue_finalize_gpu(void)
Definition FLASH_Queue_gpu.c:36

References FLASH_Queue_finalize_gpu().

Referenced by FLA_Finalize().

◆ FLASH_Queue_get_block_size()

dim_t FLASH_Queue_get_block_size ( void  )
482{
483 return flash_queue_block_size;
484}

Referenced by FLASH_Queue_exec().

◆ FLASH_Queue_get_cache_line_size()

dim_t FLASH_Queue_get_cache_line_size ( void  )
530{
531 return flash_queue_cache_line_size;
532}

Referenced by FLASH_Queue_prefetch_block().

◆ FLASH_Queue_get_cache_size()

dim_t FLASH_Queue_get_cache_size ( void  )
506{
507 return flash_queue_cache_size;
508}

Referenced by FLASH_Queue_exec().

◆ FLASH_Queue_get_caching()

FLA_Bool FLASH_Queue_get_caching ( void  )

◆ FLASH_Queue_get_cores_per_cache()

int FLASH_Queue_get_cores_per_cache ( void  )
554{
555 return flash_queue_cores_per_cache;
556}

Referenced by FLASH_Queue_exec(), FLASH_Queue_exec_parallel_function(), and FLASH_Queue_exec_simulation().

◆ FLASH_Queue_get_cores_per_queue()

int FLASH_Queue_get_cores_per_queue ( void  )
578{
579 return flash_queue_cores_per_queue;
580}

Referenced by FLASH_Queue_exec().

◆ FLASH_Queue_get_data_affinity()

FLASH_Data_aff FLASH_Queue_get_data_affinity ( void  )
410{
411 return flash_queue_data_affinity;
412}

Referenced by FLASH_Queue_exec(), FLASH_Queue_init_tasks(), and FLASH_Queue_verbose_output().

◆ FLASH_Queue_get_enabled()

FLA_Bool FLASH_Queue_get_enabled ( void  )

◆ FLASH_Queue_get_head_task()

FLASH_Task * FLASH_Queue_get_head_task ( void  )
609{
610 return _tq.head;
611}
FLASH_Queue _tq
Definition FLASH_Queue.c:27
FLASH_Task * head
Definition FLA_type_defs.h:179

References _tq, and FLASH_Queue_s::head.

Referenced by FLASH_Queue_init_tasks(), and FLASH_Queue_verbose_output().

◆ FLASH_Queue_get_num_tasks()

unsigned int FLASH_Queue_get_num_tasks ( void  )

◆ FLASH_Queue_get_num_threads()

unsigned int FLASH_Queue_get_num_threads ( void  )

◆ FLASH_Queue_get_parallel_time()

double FLASH_Queue_get_parallel_time ( void  )
436{
437 // Only return time if out of parallel region.
438 if ( flash_queue_stack == 0 )
439 return flash_queue_parallel_time;
440
441 return 0.0;
442}

◆ FLASH_Queue_get_sorting()

FLA_Bool FLASH_Queue_get_sorting ( void  )
338{
339 return flash_queue_sorting;
340}

Referenced by FLASH_Queue_wait_enqueue(), and FLASH_Task_update_binding().

◆ FLASH_Queue_get_tail_task()

FLASH_Task * FLASH_Queue_get_tail_task ( void  )
620{
621 return _tq.tail;
622}
FLASH_Task * tail
Definition FLA_type_defs.h:180

References _tq, and FLASH_Queue_s::tail.

Referenced by FLASH_Queue_init_tasks().

◆ FLASH_Queue_get_total_time()

double FLASH_Queue_get_total_time ( void  )
421{
422 // Only return time if out of parallel region.
423 if ( flash_queue_stack == 0 )
424 return flash_queue_total_time;
425
426 return 0.0;
427}

◆ FLASH_Queue_get_verbose_output()

FLASH_Verbose FLASH_Queue_get_verbose_output ( void  )
314{
315 return flash_queue_verbose;
316}

Referenced by FLASH_Queue_exec(), FLASH_Queue_exec_simulation(), and FLASH_Queue_verbose_output().

◆ FLASH_Queue_get_work_stealing()

FLA_Bool FLASH_Queue_get_work_stealing ( void  )
386{
387 return flash_queue_work_stealing;
388}

Referenced by FLASH_Queue_exec(), FLASH_Queue_exec_parallel_function(), and FLASH_Task_update_dependencies().

◆ FLASH_Queue_init()

void FLASH_Queue_init ( void  )
243{
244 // Exit early if we're already initialized.
245 if ( flash_queue_initialized == TRUE )
246 return;
247
248 // Reset all the initial values.
250
251 // Set the initialized flag.
252 flash_queue_initialized = TRUE;
253
254#ifdef FLA_ENABLE_GPU
256#endif
257
258 return;
259}
void FLASH_Queue_reset(void)
Definition FLASH_Queue.c:583
void FLASH_Queue_init_gpu(void)
Definition FLASH_Queue_gpu.c:23

References FLASH_Queue_init_gpu(), and FLASH_Queue_reset().

Referenced by FLA_Init().

◆ FLASH_Queue_push()

void FLASH_Queue_push ( void func,
void cntl,
char name,
FLA_Bool  enabled_gpu,
int  n_int_args,
int  n_fla_args,
int  n_input_args,
int  n_output_args,
  ... 
)
639{
640 int i;
642 FLASH_Task* t;
643 FLA_Obj obj;
644
645 // Allocate a new FLA_Task and populate its fields with appropriate values.
646 t = FLASH_Task_alloc( func, cntl, name, enabled_gpu,
647 n_int_args, n_fla_args,
648 n_input_args, n_output_args );
649
650 // Initialize variable argument environment. In case you're wondering, the
651 // second argument in this macro invocation of va_start() is supposed to be
652 // the parameter that immediately preceeds the variable argument list
653 // (ie: the ... above ).
654 va_start( var_arg_list, n_output_args );
655
656 // Extract the integer arguments.
657 for ( i = 0; i < n_int_args; i++ )
658 t->int_arg[i] = va_arg( var_arg_list, int );
659
660 // Extract the FLA_Obj arguments.
661 for ( i = 0; i < n_fla_args; i++ )
662 t->fla_arg[i] = va_arg( var_arg_list, FLA_Obj );
663
664 // Extract the input FLA_Obj arguments.
665 for ( i = 0; i < n_input_args; i++ )
666 {
667 obj = va_arg( var_arg_list, FLA_Obj );
668 t->input_arg[i] = obj;
669
670 // Macroblock is used.
671 if ( FLA_Obj_elemtype( obj ) == FLA_MATRIX )
672 {
673 dim_t jj, kk;
674 dim_t m = FLA_Obj_length( obj );
675 dim_t n = FLA_Obj_width( obj );
676 dim_t cs = FLA_Obj_col_stride( obj );
677 FLA_Obj* buf = FLASH_OBJ_PTR_AT( obj );
678
679 // Dependence analysis for each input block in macroblock.
680 for ( jj = 0; jj < n; jj++ )
681 for ( kk = 0; kk < m; kk++ )
682 FLASH_Queue_push_input( *( buf + jj * cs + kk ), t );
683
684 // Set the number of blocks in the macroblock subtracted by one
685 // since we do not want to recount an operand for each n_input_arg.
686 t->n_macro_args += m * n - 1;
687 }
688 else // Regular block.
689 {
690 // Dependence analysis for input operand.
692 }
693 }
694
695 // Extract the output FLA_Obj arguments.
696 for ( i = 0; i < n_output_args; i++ )
697 {
698 obj = va_arg( var_arg_list, FLA_Obj );
699 t->output_arg[i] = obj;
700
701 // Only assign data affinity to the first output block.
702 if ( i == 0 )
703 {
704 FLA_Obj buf = obj;
705
706 // Use the top left block of the macroblock.
707 if ( FLA_Obj_elemtype( obj ) == FLA_MATRIX )
708 buf = *FLASH_OBJ_PTR_AT( obj );
709
710 if ( buf.base->write_task == NULL )
711 t->queue = flash_queue_n_write_blocks;
712 else
713 t->queue = buf.base->write_task->queue;
714 }
715
716 // Macroblock is used.
717 if ( FLA_Obj_elemtype( obj ) == FLA_MATRIX )
718 {
719 dim_t jj, kk;
720 dim_t m = FLA_Obj_length( obj );
721 dim_t n = FLA_Obj_width( obj );
722 dim_t cs = FLA_Obj_col_stride( obj );
723 FLA_Obj* buf = FLASH_OBJ_PTR_AT( obj );
724
725 // Dependence analysis for each output block in macroblock.
726 for ( jj = 0; jj < n; jj++ )
727 for ( kk = 0; kk < m; kk++ )
728 FLASH_Queue_push_output( *( buf + jj * cs + kk ), t );
729
730 // Set the number of blocks in the macroblock subtracted by one
731 // since we do not want to recount an operand for each n_output_arg.
732 t->n_macro_args += m * n - 1;
733 }
734 else // Regular block.
735 {
736 // Dependence analysis for output operand.
738 }
739 }
740
741 // Finalize the variable argument environment.
743
744 // Add the task to the tail of the queue (and the head if queue is empty).
745 if ( _tq.n_tasks == 0 )
746 {
747 _tq.head = t;
748 _tq.tail = t;
749 }
750 else
751 {
752 t->prev_task = _tq.tail;
753 _tq.tail->next_task = t;
754 _tq.tail = t;
755
756 // Determine the index of the task in the task queue.
757 t->order = t->prev_task->order + 1;
758 }
759
760 // Increment the number of tasks.
761 _tq.n_tasks++;
762
763 return;
764}
void FLASH_Queue_push_output(FLA_Obj obj, FLASH_Task *t)
Definition FLASH_Queue.c:842
FLASH_Task * FLASH_Task_alloc(void *func, void *cntl, char *name, FLA_Bool enabled_gpu, int n_int_args, int n_fla_args, int n_input_args, int n_output_args)
Definition FLASH_Queue.c:956
void FLASH_Queue_push_input(FLA_Obj obj, FLASH_Task *t)
Definition FLASH_Queue.c:767
dim_t FLA_Obj_width(FLA_Obj obj)
Definition FLA_Query.c:123
dim_t FLA_Obj_length(FLA_Obj obj)
Definition FLA_Query.c:116
FLA_Elemtype FLA_Obj_elemtype(FLA_Obj obj)
Definition FLA_Query.c:51
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition FLA_Query.c:174
Definition FLA_type_defs.h:184
FLASH_Task * next_task
Definition FLA_type_defs.h:237
int order
Definition FLA_type_defs.h:189
FLASH_Task * prev_task
Definition FLA_type_defs.h:236

References _tq, FLA_Obj_view::base, FLASH_Task_s::fla_arg, FLA_Obj_col_stride(), FLA_Obj_elemtype(), FLA_Obj_length(), FLA_Obj_width(), FLASH_Queue_push_input(), FLASH_Queue_push_output(), FLASH_Task_alloc(), FLASH_Queue_s::head, i, FLASH_Task_s::input_arg, FLASH_Task_s::int_arg, FLASH_Task_s::n_macro_args, FLASH_Queue_s::n_tasks, FLASH_Task_s::next_task, FLASH_Task_s::order, FLASH_Task_s::output_arg, FLASH_Task_s::prev_task, FLASH_Task_s::queue, FLASH_Queue_s::tail, and FLA_Obj_struct::write_task.

◆ FLASH_Queue_push_input()

void FLASH_Queue_push_input ( FLA_Obj  obj,
FLASH_Task t 
)
774{
775 FLASH_Task* task;
776 FLASH_Dep* d;
777
778 // Find dependence information.
779 if ( obj.base->write_task == NULL )
780 {
781 t->n_ready--;
782
783 // Add to number of blocks read if not written and not read before.
784 if ( obj.base->n_read_tasks == 0 )
785 {
786 // Identify each read block with an id for freeing.
787 obj.base->n_read_blocks = flash_queue_n_read_blocks;
788
789 flash_queue_n_read_blocks++;
790 }
791 }
792 else
793 { // Flow dependence.
794 task = obj.base->write_task;
795
796 d = (FLASH_Dep *) FLA_malloc( sizeof(FLASH_Dep) );
797
798 d->task = t;
799 d->next_dep = NULL;
800
801 if ( task->n_dep_args == 0 )
802 {
803 task->dep_arg_head = d;
804 task->dep_arg_tail = d;
805 }
806 else
807 {
808 task->dep_arg_tail->next_dep = d;
809 task->dep_arg_tail = d;
810 }
811
812 task->n_dep_args++;
813 }
814
815 // Add task to the read task in the object if not already there.
816 if ( obj.base->n_read_tasks == 0 ||
817 obj.base->read_task_tail->task != t )
818 { // Anti-dependence potentially.
819 d = (FLASH_Dep *) FLA_malloc( sizeof(FLASH_Dep) );
820
821 d->task = t;
822 d->next_dep = NULL;
823
824 if ( obj.base->n_read_tasks == 0 )
825 {
826 obj.base->read_task_head = d;
827 obj.base->read_task_tail = d;
828 }
829 else
830 {
831 obj.base->read_task_tail->next_dep = d;
832 obj.base->read_task_tail = d;
833 }
834
835 obj.base->n_read_tasks++;
836 }
837
838 return;
839}
void * FLA_malloc(size_t size)
Definition FLA_Memory.c:111
Definition FLA_type_defs.h:245
FLASH_Task * task
Definition FLA_type_defs.h:247
FLASH_Dep * next_dep
Definition FLA_type_defs.h:250
int n_dep_args
Definition FLA_type_defs.h:231
FLASH_Dep * dep_arg_head
Definition FLA_type_defs.h:232
FLASH_Dep * dep_arg_tail
Definition FLA_type_defs.h:233
FLASH_Dep * read_task_tail
Definition FLA_type_defs.h:151
FLASH_Dep * read_task_head
Definition FLA_type_defs.h:150
int n_read_blocks
Definition FLA_type_defs.h:145
FLASH_Task * write_task
Definition FLA_type_defs.h:154
int n_read_tasks
Definition FLA_type_defs.h:149
FLA_Base_obj * base
Definition FLA_type_defs.h:168

References FLA_Obj_view::base, FLASH_Task_s::dep_arg_head, FLASH_Task_s::dep_arg_tail, FLA_malloc(), FLASH_Task_s::n_dep_args, FLA_Obj_struct::n_read_blocks, FLA_Obj_struct::n_read_tasks, FLASH_Task_s::n_ready, FLASH_Dep_s::next_dep, FLA_Obj_struct::read_task_head, FLA_Obj_struct::read_task_tail, FLASH_Dep_s::task, and FLA_Obj_struct::write_task.

Referenced by FLASH_Queue_push().

◆ FLASH_Queue_push_output()

void FLASH_Queue_push_output ( FLA_Obj  obj,
FLASH_Task t 
)
849{
850 int i;
851 FLASH_Task* task;
852 FLASH_Dep* d;
853 FLASH_Dep* next_dep;
854
855 // Assign tasks to threads with data affinity.
856 if ( obj.base->write_task == NULL )
857 {
858 t->n_ready--;
859
860 // Save index in which this output block is first encountered.
861 obj.base->n_write_blocks = flash_queue_n_write_blocks;
862
863 // Number of blocks written if not written before.
864 flash_queue_n_write_blocks++;
865
866 // Add to number of blocks read if not written or read before.
867 if ( obj.base->n_read_tasks == 0 )
868 {
869 // Identify each read block with an id for freeing.
870 obj.base->n_read_blocks = flash_queue_n_read_blocks;
871
872 flash_queue_n_read_blocks++;
873 }
874 }
875 else
876 { // Flow dependence potentially.
877 // The last task to overwrite this block is not itself.
878 if ( obj.base->write_task != t )
879 {
880 // Create dependency from task that last wrote the block.
881 task = obj.base->write_task;
882
883 d = (FLASH_Dep *) FLA_malloc( sizeof(FLASH_Dep) );
884
885 d->task = t;
886 d->next_dep = NULL;
887
888 if ( task->n_dep_args == 0 )
889 {
890 task->dep_arg_head = d;
891 task->dep_arg_tail = d;
892 }
893 else
894 {
895 task->dep_arg_tail->next_dep = d;
896 task->dep_arg_tail = d;
897 }
898
899 task->n_dep_args++;
900 }
901 else
902 {
903 // No need to notify task twice for output block already seen.
904 t->n_ready--;
905 }
906 }
907
908 // Clear read task for next set of reads and record the anti-dependence.
909 d = obj.base->read_task_head;
910
911 for ( i = 0; i < obj.base->n_read_tasks; i++ )
912 {
913 task = d->task;
914 next_dep = d->next_dep;
915
916 // If the last task to read is not the current task, add dependence.
917 if ( task != t )
918 {
919 d->task = t;
920 d->next_dep = NULL;
921
922 if ( task->n_dep_args == 0 )
923 {
924 task->dep_arg_head = d;
925 task->dep_arg_tail = d;
926 }
927 else
928 {
929 task->dep_arg_tail->next_dep = d;
930 task->dep_arg_tail = d;
931 }
932
933 task->n_dep_args++;
934
935 t->n_war_args++;
936 }
937 else
938 {
939 FLA_free( d );
940 }
941
942 d = next_dep;
943 }
944
945 obj.base->n_read_tasks = 0;
946 obj.base->read_task_head = NULL;
947 obj.base->read_task_tail = NULL;
948
949 // Record this task as the last to write to this block.
950 obj.base->write_task = t;
951
952 return;
953}
void FLA_free(void *ptr)
Definition FLA_Memory.c:247
int n_write_blocks
Definition FLA_type_defs.h:146

References FLA_Obj_view::base, FLASH_Task_s::dep_arg_head, FLASH_Task_s::dep_arg_tail, FLA_free(), FLA_malloc(), i, FLASH_Task_s::n_dep_args, FLA_Obj_struct::n_read_blocks, FLA_Obj_struct::n_read_tasks, FLASH_Task_s::n_ready, FLASH_Task_s::n_war_args, FLA_Obj_struct::n_write_blocks, FLASH_Dep_s::next_dep, FLA_Obj_struct::read_task_head, FLA_Obj_struct::read_task_tail, FLASH_Dep_s::task, and FLA_Obj_struct::write_task.

Referenced by FLASH_Queue_push().

◆ FLASH_Queue_reset()

void FLASH_Queue_reset ( void  )
589{
590 // Clear the other fields of the FLASH_Queue structure.
591 _tq.n_tasks = 0;
592 _tq.head = NULL;
593 _tq.tail = NULL;
594
595 // Reset the number of blocks.
596 flash_queue_n_read_blocks = 0;
597 flash_queue_n_write_blocks = 0;
598
599 return;
600}

References _tq, FLASH_Queue_s::head, FLASH_Queue_s::n_tasks, and FLASH_Queue_s::tail.

Referenced by FLASH_Queue_exec(), and FLASH_Queue_init().

◆ FLASH_Queue_set_block_size()

void FLASH_Queue_set_block_size ( dim_t  size)
467{
468 // Only adjust the block size if the new block is larger.
469 if ( flash_queue_block_size < size )
470 flash_queue_block_size = size;
471
472 return;
473}

Referenced by FLASH_Obj_create_hierarchy().

◆ FLASH_Queue_set_cache_line_size()

void FLASH_Queue_set_cache_line_size ( dim_t  size)
517{
518 flash_queue_cache_line_size = size;
519
520 return;
521}

◆ FLASH_Queue_set_cache_size()

void FLASH_Queue_set_cache_size ( dim_t  size)
493{
494 flash_queue_cache_size = size;
495
496 return;
497}

◆ FLASH_Queue_set_caching()

void FLASH_Queue_set_caching ( FLA_Bool  caching)
349{
350 flash_queue_caching = caching;
351
352 return;
353}

Referenced by FLASH_Queue_exec().

◆ FLASH_Queue_set_cores_per_cache()

void FLASH_Queue_set_cores_per_cache ( int  cores)
541{
542 flash_queue_cores_per_cache = cores;
543
544 return;
545}

◆ FLASH_Queue_set_cores_per_queue()

void FLASH_Queue_set_cores_per_queue ( int  cores)
565{
566 flash_queue_cores_per_queue = cores;
567
568 return;
569}

◆ FLASH_Queue_set_data_affinity()

void FLASH_Queue_set_data_affinity ( FLASH_Data_aff  data_affinity)
397{
398 flash_queue_data_affinity = data_affinity;
399
400 return;
401}

Referenced by FLASH_Queue_exec().

◆ FLASH_Queue_set_num_threads()

void FLASH_Queue_set_num_threads ( unsigned int  n_threads)
193{
195
196 // Verify that the number of threads is positive.
199
200 // Keep track of the number of threads internally.
201 flash_queue_n_threads = n_threads;
202
203#if FLA_MULTITHREADING_MODEL == FLA_OPENMP
204
205 // No additional action is necessary to set the number of OpenMP threads
206 // since setting the number of threads is handled at the parallel for loop
207 // with a num_threads() clause. This gives the user more flexibility since
208 // he can use the OMP_NUM_THREADS environment variable or the
209 // omp_set_num_threads() function to set the global number of OpenMP threads
210 // independently of the number of SuperMatrix threads.
211
212#elif FLA_MULTITHREADING_MODEL == FLA_PTHREADS
213
214 // No additional action is necessary to set the number of pthreads
215 // since setting the number of threads is handled entirely on our end.
216
217#endif
218
219 return;
220}
FLA_Error FLA_Check_num_threads(unsigned int n_threads)
Definition FLA_Check.c:884

References FLA_Check_num_threads().

◆ FLASH_Queue_set_parallel_time()

void FLASH_Queue_set_parallel_time ( double  dtime)
454{
455 flash_queue_parallel_time = dtime;
456
457 return;
458}

Referenced by FLASH_Queue_exec().

◆ FLASH_Queue_set_sorting()

void FLASH_Queue_set_sorting ( FLA_Bool  sorting)
325{
326 flash_queue_sorting = sorting;
327
328 return;
329}

◆ FLASH_Queue_set_verbose_output()

void FLASH_Queue_set_verbose_output ( FLASH_Verbose  verbose)
301{
302 flash_queue_verbose = verbose;
303
304 return;
305}

◆ FLASH_Queue_set_work_stealing()

void FLASH_Queue_set_work_stealing ( FLA_Bool  work_stealing)
373{
374 flash_queue_work_stealing = work_stealing;
375
376 return;
377}

Referenced by FLASH_Queue_exec().

◆ FLASH_Queue_stack_depth()

unsigned int FLASH_Queue_stack_depth ( void  )
112{
113 return flash_queue_stack;
114}

Referenced by FLASH_Eig_gest(), FLASH_LU_incpiv(), FLASH_QR_UT_inc(), FLASH_Queue_disable_gpu(), and FLASH_Queue_enable_gpu().

◆ FLASH_Queue_verbose_output()

void FLASH_Queue_verbose_output ( void  )
1788{
1789 int i, j, k;
1791 int n_tasks = FLASH_Queue_get_num_tasks();
1793 FLASH_Task* t;
1794 FLASH_Dep* d;
1795
1796 // Grab the head of the task queue.
1798
1800 {
1801 // Iterate over linked list of tasks.
1802 for ( i = 0; i < n_tasks; i++ )
1803 {
1804 printf( "%d\t%s\t", t->order, t->name );
1805
1806 for ( j = 0; j < t->n_output_args; j++ )
1807 printf( "%lu[%lu,%lu] ", t->output_arg[j].base->id,
1808 t->output_arg[j].base->m_index,
1809 t->output_arg[j].base->n_index );
1810
1811 printf( ":= " );
1812
1813 for ( j = 0; j < t->n_output_args; j++ )
1814 printf( "%lu[%lu,%lu] ", t->output_arg[j].base->id,
1815 t->output_arg[j].base->m_index,
1816 t->output_arg[j].base->n_index );
1817
1818 for ( j = 0; j < t->n_input_args; j++ )
1819 printf( "%lu[%lu,%lu] ", t->input_arg[j].base->id,
1820 t->input_arg[j].base->m_index,
1821 t->input_arg[j].base->n_index );
1822
1823 printf( "\n" );
1824
1825 // Go to the next task.
1826 t = t->next_task;
1827 }
1828
1829 printf( "\n" );
1830 }
1831 else
1832 {
1833 printf( "digraph SuperMatrix {\n" );
1834
1836 {
1837 // Iterate over linked list of tasks.
1838 for ( i = 0; i < n_tasks; i++ )
1839 {
1840 printf( "%d [label=\"%s\"]; %d -> {", t->order, t->name, t->order);
1841
1842 d = t->dep_arg_head;
1843 for ( j = 0; j < t->n_dep_args; j++ )
1844 {
1845 printf( "%d;", d->task->order );
1846 d = d->next_dep;
1847 }
1848
1849 printf( "};\n" );
1850
1851 // Go to the next task.
1852 t = t->next_task;
1853 }
1854 }
1855 else
1856 {
1857 // Iterate over all the threads.
1858 for ( k = 0; k < n_threads; k++ )
1859 {
1860 printf( "subgraph cluster%d {\nlabel=\"%d\"\n", k, k );
1861
1862 // Iterate over linked list of tasks.
1863 for ( i = 0; i < n_tasks; i++ )
1864 {
1865 if ( t->queue == k )
1866 printf( "%d [label=\"%s\"];\n", t->order, t->name );
1867
1868 // Go to the next task.
1869 t = t->next_task;
1870 }
1871
1872 printf( "}\n" );
1873
1874 // Grab the head of the task queue.
1876 }
1877
1878 // Iterate over linked list of tasks.
1879 for ( i = 0; i < n_tasks; i++ )
1880 {
1881 printf( "%d -> {", t->order );
1882
1883 d = t->dep_arg_head;
1884 for ( j = 0; j < t->n_dep_args; j++ )
1885 {
1886 printf( "%d;", d->task->order );
1887 d = d->next_dep;
1888 }
1889
1890 printf( "};\n" );
1891
1892 // Go to the next task.
1893 t = t->next_task;
1894 }
1895 }
1896
1897 printf( "}\n\n" );
1898 }
1899
1900 return;
1901}
FLASH_Verbose FLASH_Queue_get_verbose_output(void)
Definition FLASH_Queue.c:308
FLASH_Task * FLASH_Queue_get_head_task(void)
Definition FLASH_Queue.c:603
FLASH_Data_aff FLASH_Queue_get_data_affinity(void)
Definition FLASH_Queue.c:404
unsigned int FLASH_Queue_get_num_threads(void)
Definition FLASH_Queue.c:223
unsigned int FLASH_Queue_get_num_tasks(void)
Definition FLASH_Queue.c:284
int FLASH_Verbose
Definition FLA_type_defs.h:113

References FLA_Obj_view::base, FLASH_Task_s::dep_arg_head, FLASH_Queue_get_data_affinity(), FLASH_Queue_get_head_task(), FLASH_Queue_get_num_tasks(), FLASH_Queue_get_num_threads(), FLASH_Queue_get_verbose_output(), i, FLA_Obj_struct::id, FLASH_Task_s::input_arg, FLA_Obj_struct::m_index, FLASH_Task_s::n_dep_args, FLA_Obj_struct::n_index, FLASH_Task_s::n_input_args, FLASH_Task_s::n_output_args, FLASH_Task_s::name, FLASH_Dep_s::next_dep, FLASH_Task_s::next_task, FLASH_Task_s::order, FLASH_Task_s::output_arg, FLASH_Task_s::queue, and FLASH_Dep_s::task.

Referenced by FLASH_Queue_exec().

◆ FLASH_Task_alloc()

FLASH_Task * FLASH_Task_alloc ( void func,
void cntl,
char name,
FLA_Bool  enabled_gpu,
int  n_int_args,
int  n_fla_args,
int  n_input_args,
int  n_output_args 
)
969{
970 FLASH_Task* t;
971
972 // Allocate space for the task structure t.
973 t = (FLASH_Task *) FLA_malloc( sizeof(FLASH_Task) );
974
975 // Allocate space for the task's integer arguments.
976 t->int_arg = (int *) FLA_malloc( n_int_args * sizeof(int) );
977
978 // Allocate space for the task's FLA_Obj arguments.
979 t->fla_arg = (FLA_Obj *) FLA_malloc( n_fla_args * sizeof(FLA_Obj) );
980
981 // Allocate space for the task's input FLA_Obj arguments.
982 t->input_arg = (FLA_Obj *) FLA_malloc( n_input_args * sizeof(FLA_Obj) );
983
984 // Allocate space for the task's output FLA_Obj arguments.
985 t->output_arg = (FLA_Obj *) FLA_malloc( n_output_args * sizeof(FLA_Obj) );
986
987 // Initialize other fields of the structure.
988 t->n_ready = 0;
989 t->order = 0;
990 t->queue = 0;
991 t->height = 0;
992 t->thread = 0;
993 t->cache = 0;
994 t->hit = FALSE;
995
996 t->func = func;
997 t->cntl = cntl;
998 t->name = name;
999 t->enabled_gpu = enabled_gpu;
1000 t->n_int_args = n_int_args;
1001 t->n_fla_args = n_fla_args;
1002 t->n_input_args = n_input_args;
1003 t->n_output_args = n_output_args;
1004
1005 t->n_macro_args = 0;
1006 t->n_war_args = 0;
1007 t->n_dep_args = 0;
1008 t->dep_arg_head = NULL;
1009 t->dep_arg_tail = NULL;
1010 t->prev_task = NULL;
1011 t->next_task = NULL;
1012 t->prev_wait = NULL;
1013 t->next_wait = NULL;
1014
1015 // Return a pointer to the initialized structure.
1016 return t;
1017}
int * int_arg
Definition FLA_type_defs.h:210

References FLASH_Task_s::cache, FLASH_Task_s::cntl, FLASH_Task_s::dep_arg_head, FLASH_Task_s::dep_arg_tail, FLASH_Task_s::enabled_gpu, FLASH_Task_s::fla_arg, FLA_malloc(), FLASH_Task_s::func, FLASH_Task_s::height, FLASH_Task_s::hit, FLASH_Task_s::input_arg, FLASH_Task_s::int_arg, FLASH_Task_s::n_dep_args, FLASH_Task_s::n_fla_args, FLASH_Task_s::n_input_args, FLASH_Task_s::n_int_args, FLASH_Task_s::n_macro_args, FLASH_Task_s::n_output_args, FLASH_Task_s::n_ready, FLASH_Task_s::n_war_args, FLASH_Task_s::name, FLASH_Task_s::next_task, FLASH_Task_s::next_wait, FLASH_Task_s::order, FLASH_Task_s::output_arg, FLASH_Task_s::prev_task, FLASH_Task_s::prev_wait, FLASH_Task_s::queue, and FLASH_Task_s::thread.

Referenced by FLASH_Queue_push().

◆ FLASH_Task_free()

void FLASH_Task_free ( FLASH_Task t)
1026{
1027 int i, j, k;
1028 FLA_Obj obj;
1029 FLASH_Dep* d;
1030 FLASH_Dep* next_dep;
1031
1032 // Clearing the last write task in each output block.
1033 for ( i = 0; i < t->n_output_args; i++ )
1034 {
1035 obj = t->output_arg[i];
1036
1037 // Macroblock is used.
1038 if ( FLA_Obj_elemtype( obj ) == FLA_MATRIX )
1039 {
1040 dim_t jj, kk;
1041 dim_t m = FLA_Obj_length( obj );
1042 dim_t n = FLA_Obj_width( obj );
1043 dim_t cs = FLA_Obj_col_stride( obj );
1044 FLA_Obj* buf = FLASH_OBJ_PTR_AT( obj );
1045
1046 // Clear each block in macroblock.
1047 for ( jj = 0; jj < n; jj++ )
1048 for ( kk = 0; kk < m; kk++ )
1049 ( buf + jj * cs + kk )->base->write_task = NULL;
1050 }
1051 else // Clear regular block.
1052 {
1053 obj.base->write_task = NULL;
1054 }
1055 }
1056
1057 // Cleaning the last read tasks in each input block.
1058 for ( i = 0; i < t->n_input_args; i++ )
1059 {
1060 obj = t->input_arg[i];
1061
1062 // Macroblock is used.
1063 if ( FLA_Obj_elemtype( obj ) == FLA_MATRIX )
1064 {
1065 dim_t jj, kk;
1066 dim_t m = FLA_Obj_length( obj );
1067 dim_t n = FLA_Obj_width( obj );
1068 dim_t cs = FLA_Obj_col_stride( obj );
1069 FLA_Obj* buf = FLASH_OBJ_PTR_AT( obj );
1070
1071 // Clear each block in macroblock.
1072 for ( jj = 0; jj < n; jj++ )
1073 {
1074 for ( kk = 0; kk < m; kk++ )
1075 {
1076 obj = *( buf + jj * cs + kk );
1077
1078 k = obj.base->n_read_tasks;
1079 d = obj.base->read_task_head;
1080
1081 obj.base->n_read_tasks = 0;
1082 obj.base->read_task_head = NULL;
1083 obj.base->read_task_tail = NULL;
1084
1085 for ( j = 0; j < k; j++ )
1086 {
1087 next_dep = d->next_dep;
1088 FLA_free( d );
1089 d = next_dep;
1090 }
1091 }
1092 }
1093 }
1094 else // Regular block.
1095 {
1096 k = obj.base->n_read_tasks;
1097 d = obj.base->read_task_head;
1098
1099 obj.base->n_read_tasks = 0;
1100 obj.base->read_task_head = NULL;
1101 obj.base->read_task_tail = NULL;
1102
1103 for ( j = 0; j < k; j++ )
1104 {
1105 next_dep = d->next_dep;
1106 FLA_free( d );
1107 d = next_dep;
1108 }
1109 }
1110 }
1111
1112 // Free the dep_arg field of t.
1113 d = t->dep_arg_head;
1114
1115 for ( i = 0; i < t->n_dep_args; i++ )
1116 {
1117 next_dep = d->next_dep;
1118 FLA_free( d );
1119 d = next_dep;
1120 }
1121
1122 // Free the int_arg field of t.
1123 FLA_free( t->int_arg );
1124
1125 // Free the fla_arg field of t.
1126 FLA_free( t->fla_arg );
1127
1128 // Free the input_arg field of t.
1129 FLA_free( t->input_arg );
1130
1131 // Free the output_arg field of t.
1132 FLA_free( t->output_arg );
1133
1134 // Finally, free the struct itself.
1135 FLA_free( t );
1136
1137 return;
1138}

References FLA_Obj_view::base, FLASH_Task_s::dep_arg_head, FLASH_Task_s::fla_arg, FLA_free(), FLA_Obj_col_stride(), FLA_Obj_elemtype(), FLA_Obj_length(), FLA_Obj_width(), i, FLASH_Task_s::input_arg, FLASH_Task_s::int_arg, FLASH_Task_s::n_dep_args, FLASH_Task_s::n_input_args, FLASH_Task_s::n_output_args, FLA_Obj_struct::n_read_tasks, FLASH_Dep_s::next_dep, FLASH_Task_s::output_arg, FLA_Obj_struct::read_task_head, FLA_Obj_struct::read_task_tail, and FLA_Obj_struct::write_task.

Referenced by FLASH_Queue_exec(), and FLASH_Queue_exec_simulation().

Variable Documentation

◆ _tq