libflame revision_anchor
Functions
FLA_Apply_G_rf_asm_var3b.c File Reference

(r)

Functions

FLA_Error FLA_Apply_G_rf_asm_var3b (FLA_Obj G, FLA_Obj A)
 
FLA_Error FLA_Apply_G_rf_ass_var3b (int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asd_var3b (int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asc_var3b (int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asz_var3b (int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
 

Function Documentation

◆ FLA_Apply_G_rf_asc_var3b()

FLA_Error FLA_Apply_G_rf_asc_var3b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
int  iTL,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A 
)
614{
616
617 return FLA_SUCCESS;
618}
int i
Definition bl1_axmyv2.c:145

References i.

Referenced by FLA_Apply_G_rf_asm_var3b().

◆ FLA_Apply_G_rf_asd_var3b()

FLA_Error FLA_Apply_G_rf_asd_var3b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
int  iTL,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double buff_A,
int  rs_A,
int  cs_A 
)
138{
139 double one = bl1_d1();
140 double zero = bl1_d0();
141 double gamma23_k1;
142 double sigma23_k1;
143 double gamma34_k1;
144 double sigma34_k1;
145 double gamma12_k2;
146 double sigma12_k2;
147 double gamma23_k2;
148 double sigma23_k2;
149 double* a1;
150 double* a2;
151 double* a3;
152 double* a4;
157 int i, j, g, k;
158 int nG, nG_app;
159 int n_iter;
160 int n_left;
161 int k_minus_1;
162 int n_fuse;
163 int k_fuse;
166 int has_ident;
167 int m_app;
168
169
170 k_minus_1 = k_G - 1;
171 nG = n_A - 1;
172 n_fuse = 2;
173 k_fuse = 2;
174
175 // Use the simple variant for nG < (k - 1) or k == 1.
176 if ( nG < 2*k_minus_1 || k_G == 1 )
177 {
179 m_A,
180 n_A,
181 buff_G, rs_G, cs_G,
182 buff_A, rs_A, cs_A );
183 return FLA_SUCCESS;
184 }
185
186
187 // Start-up phase.
188
189 for ( j = -1; j < k_minus_1; j += n_fuse )
190 {
191 nG_app = j + 2;
192 n_iter = nG_app / k_fuse;
193 //n_iter = nG_app % k_fuse;
194 n_left = 1;
195
196 for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
197 {
198 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
199 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
200 g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
201 g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
202 a1 = buff_A + (g - 1)*cs_A;
203 a2 = buff_A + (g )*cs_A;
204 a3 = buff_A + (g + 1)*cs_A;
205 a4 = buff_A + (g + 2)*cs_A;
206
207 gamma23_k1 = g23_k1->real;
208 sigma23_k1 = g23_k1->imag;
209 gamma34_k1 = g34_k1->real;
210 sigma34_k1 = g34_k1->imag;
211 gamma12_k2 = g12_k2->real;
212 sigma12_k2 = g12_k2->imag;
213 gamma23_k2 = g23_k2->real;
214 sigma23_k2 = g23_k2->imag;
215
222
223 m_app = min( i_k + 3 + j - iTL, m_A );
224 m_app = max( m_app, 0 );
225
226 if ( has_ident )
227 {
228 // Apply to pairs of columns as needed.
229
230 if ( !is_ident23_k1 )
232 &gamma23_k1,
233 &sigma23_k1,
234 a2, 1,
235 a3, 1 );
236
237 if ( !is_ident34_k1 )
239 &gamma34_k1,
240 &sigma34_k1,
241 a3, 1,
242 a4, 1 );
243
244 if ( !is_ident12_k2 )
246 &gamma12_k2,
247 &sigma12_k2,
248 a1, 1,
249 a2, 1 );
250
251 if ( !is_ident23_k2 )
253 &gamma23_k2,
254 &sigma23_k2,
255 a2, 1,
256 a3, 1 );
257 }
258 else
259 {
260 // Apply to all four columns.
261
263 &gamma23_k1,
264 &sigma23_k1,
265 &gamma34_k1,
266 &sigma34_k1,
267 &gamma12_k2,
268 &sigma12_k2,
269 &gamma23_k2,
270 &sigma23_k2,
271 a1, 1,
272 a2, 1,
273 a3, 1,
274 a4, 1 );
275 }
276 }
277
278 if ( n_left == 1 )
279 {
280 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
281 a3 = buff_A + (g + 1)*cs_A;
282 a4 = buff_A + (g + 2)*cs_A;
283
284 gamma34_k1 = g34_k1->real;
285 sigma34_k1 = g34_k1->imag;
286
288
289 m_app = min( i_k + 3 + j - iTL, m_A );
290 m_app = max( m_app, 0 );
291
292 if ( !is_ident34_k1 )
294 &gamma34_k1,
295 &sigma34_k1,
296 a3, 1,
297 a4, 1 );
298 }
299 }
300
301 // Pipeline stage
302
303 for ( ; j < nG - 1; j += n_fuse )
304 {
305 nG_app = k_G;
306 n_iter = nG_app / k_fuse;
307 n_left = nG_app % k_fuse;
308
309 for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
310 {
311 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
312 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
313 g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
314 g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
315 a1 = buff_A + (g - 1)*cs_A;
316 a2 = buff_A + (g )*cs_A;
317 a3 = buff_A + (g + 1)*cs_A;
318 a4 = buff_A + (g + 2)*cs_A;
319
320 gamma23_k1 = g23_k1->real;
321 sigma23_k1 = g23_k1->imag;
322 gamma34_k1 = g34_k1->real;
323 sigma34_k1 = g34_k1->imag;
324 gamma12_k2 = g12_k2->real;
325 sigma12_k2 = g12_k2->imag;
326 gamma23_k2 = g23_k2->real;
327 sigma23_k2 = g23_k2->imag;
328
335
336 m_app = min( i_k + 3 + j - iTL, m_A );
337 m_app = max( m_app, 0 );
338
339 if ( has_ident )
340 {
341 // Apply to pairs of columns as needed.
342
343 if ( !is_ident23_k1 )
345 &gamma23_k1,
346 &sigma23_k1,
347 a2, 1,
348 a3, 1 );
349
350 if ( !is_ident34_k1 )
352 &gamma34_k1,
353 &sigma34_k1,
354 a3, 1,
355 a4, 1 );
356
357 if ( !is_ident12_k2 )
359 &gamma12_k2,
360 &sigma12_k2,
361 a1, 1,
362 a2, 1 );
363
364 if ( !is_ident23_k2 )
366 &gamma23_k2,
367 &sigma23_k2,
368 a2, 1,
369 a3, 1 );
370 }
371 else
372 {
373 // Apply to all four columns.
374
376 &gamma23_k1,
377 &sigma23_k1,
378 &gamma34_k1,
379 &sigma34_k1,
380 &gamma12_k2,
381 &sigma12_k2,
382 &gamma23_k2,
383 &sigma23_k2,
384 a1, 1,
385 a2, 1,
386 a3, 1,
387 a4, 1 );
388 }
389 }
390
391 if ( n_left == 1 )
392 {
393 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
394 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
395 a2 = buff_A + (g )*cs_A;
396 a3 = buff_A + (g + 1)*cs_A;
397 a4 = buff_A + (g + 2)*cs_A;
398
399 gamma23_k1 = g23_k1->real;
400 sigma23_k1 = g23_k1->imag;
401 gamma34_k1 = g34_k1->real;
402 sigma34_k1 = g34_k1->imag;
403
406
407 m_app = min( i_k + 3 + j - iTL, m_A );
408 m_app = max( m_app, 0 );
409
411 {
413 &gamma23_k1,
414 &sigma23_k1,
415 a2, 1,
416 a3, 1 );
417 }
418 else if ( is_ident23_k1 && !is_ident34_k1 )
419 {
421 &gamma34_k1,
422 &sigma34_k1,
423 a3, 1,
424 a4, 1 );
425 }
426 else
427 {
429 &gamma23_k1,
430 &sigma23_k1,
431 &gamma34_k1,
432 &sigma34_k1,
433 a2, 1,
434 a3, 1,
435 a4, 1 );
436 }
437 }
438 }
439
440 // Shutdown stage
441
442 for ( j = nG % n_fuse; j < k_G; j += n_fuse )
443 {
444 g = nG - 1;
445 k = j;
446
447 //n_left = 1;
448 //if ( n_left == 1 )
449 {
450 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
451 a2 = buff_A + (g )*cs_A;
452 a3 = buff_A + (g + 1)*cs_A;
453
454 gamma23_k1 = g23_k1->real;
455 sigma23_k1 = g23_k1->imag;
456
458
459 m_app = m_A;
460
461 if ( !is_ident23_k1 )
463 &gamma23_k1,
464 &sigma23_k1,
465 a2, 1,
466 a3, 1 );
467 ++k;
468 --g;
469 }
470
471 nG_app = k_minus_1 - j;
472 n_iter = nG_app / k_fuse;
473 n_left = nG_app % k_fuse;
474
475 for ( i = 0; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
476 {
477 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
478 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
479 g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
480 g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
481 a1 = buff_A + (g - 1)*cs_A;
482 a2 = buff_A + (g )*cs_A;
483 a3 = buff_A + (g + 1)*cs_A;
484 a4 = buff_A + (g + 2)*cs_A;
485
486 gamma23_k1 = g23_k1->real;
487 sigma23_k1 = g23_k1->imag;
488 gamma34_k1 = g34_k1->real;
489 sigma34_k1 = g34_k1->imag;
490 gamma12_k2 = g12_k2->real;
491 sigma12_k2 = g12_k2->imag;
492 gamma23_k2 = g23_k2->real;
493 sigma23_k2 = g23_k2->imag;
494
501
502 m_app = m_A;
503
504 if ( has_ident )
505 {
506 // Apply to pairs of columns as needed.
507
508 if ( !is_ident23_k1 )
510 &gamma23_k1,
511 &sigma23_k1,
512 a2, 1,
513 a3, 1 );
514
515 if ( !is_ident34_k1 )
517 &gamma34_k1,
518 &sigma34_k1,
519 a3, 1,
520 a4, 1 );
521
522 if ( !is_ident12_k2 )
524 &gamma12_k2,
525 &sigma12_k2,
526 a1, 1,
527 a2, 1 );
528
529 if ( !is_ident23_k2 )
531 &gamma23_k2,
532 &sigma23_k2,
533 a2, 1,
534 a3, 1 );
535 }
536 else
537 {
538 // Apply to all four columns.
539
541 &gamma23_k1,
542 &sigma23_k1,
543 &gamma34_k1,
544 &sigma34_k1,
545 &gamma12_k2,
546 &sigma12_k2,
547 &gamma23_k2,
548 &sigma23_k2,
549 a1, 1,
550 a2, 1,
551 a3, 1,
552 a4, 1 );
553 }
554 }
555
556 if ( n_left == 1 )
557 {
558 g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
559 g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
560 a2 = buff_A + (g )*cs_A;
561 a3 = buff_A + (g + 1)*cs_A;
562 a4 = buff_A + (g + 2)*cs_A;
563
564 gamma23_k1 = g23_k1->real;
565 sigma23_k1 = g23_k1->imag;
566 gamma34_k1 = g34_k1->real;
567 sigma34_k1 = g34_k1->imag;
568
571
572 m_app = m_A;
573
575 {
577 &gamma23_k1,
578 &sigma23_k1,
579 a2, 1,
580 a3, 1 );
581 }
582 else if ( is_ident23_k1 && !is_ident34_k1 )
583 {
585 &gamma34_k1,
586 &sigma34_k1,
587 a3, 1,
588 a4, 1 );
589 }
590 else
591 {
593 &gamma23_k1,
594 &sigma23_k1,
595 &gamma34_k1,
596 &sigma34_k1,
597 a2, 1,
598 a3, 1,
599 a4, 1 );
600 }
601 }
602 }
603
604 return FLA_SUCCESS;
605}
FLA_Error FLA_Apply_G_rf_asd_var1(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var1.c:164
int n_left
Definition bl1_axmyv2.c:149
double bl1_d0(void)
Definition bl1_constants.c:118
double bl1_d1(void)
Definition bl1_constants.c:54
Definition blis_type_defs.h:138

References bl1_d0(), bl1_d1(), FLA_Apply_G_rf_asd_var1(), i, and n_left.

Referenced by FLA_Apply_G_rf_asm_var3b(), and FLA_Apply_G_rf_bld_var3b().

◆ FLA_Apply_G_rf_asm_var3b()

FLA_Error FLA_Apply_G_rf_asm_var3b ( FLA_Obj  G,
FLA_Obj  A 
)
29{
30 FLA_Datatype datatype;
31 int k_G, m_A, n_A;
32 int rs_G, cs_G;
33 int rs_A, cs_A;
34
35 datatype = FLA_Obj_datatype( A );
36
37 k_G = FLA_Obj_width( G );
38 m_A = FLA_Obj_length( A );
39 n_A = FLA_Obj_width( A );
40
43
46
47 switch ( datatype )
48 {
49 case FLA_FLOAT:
50 {
52 float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
53
55 m_A,
56 n_A,
57 0,
58 0,
60 buff_A, rs_A, cs_A );
61
62 break;
63 }
64
65 case FLA_DOUBLE:
66 {
68 double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
69
71 m_A,
72 n_A,
73 0,
74 0,
76 buff_A, rs_A, cs_A );
77
78 break;
79 }
80
81 case FLA_COMPLEX:
82 {
85
87 m_A,
88 n_A,
89 0,
90 0,
92 buff_A, rs_A, cs_A );
93
94 break;
95 }
96
98 {
101
103 m_A,
104 n_A,
105 0,
106 0,
107 buff_G, rs_G, cs_G,
108 buff_A, rs_A, cs_A );
109
110 break;
111 }
112 }
113
114 return FLA_SUCCESS;
115}
FLA_Error FLA_Apply_G_rf_asd_var3b(int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var3b.c:131
FLA_Error FLA_Apply_G_rf_asz_var3b(int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var3b.c:620
FLA_Error FLA_Apply_G_rf_asc_var3b(int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var3b.c:607
FLA_Error FLA_Apply_G_rf_ass_var3b(int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var3b.c:118
dim_t FLA_Obj_width(FLA_Obj obj)
Definition FLA_Query.c:123
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition FLA_Query.c:167
dim_t FLA_Obj_length(FLA_Obj obj)
Definition FLA_Query.c:116
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition FLA_Query.c:174
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition FLA_Query.c:13
int FLA_Datatype
Definition FLA_type_defs.h:49
Definition blis_type_defs.h:133

References FLA_Apply_G_rf_asc_var3b(), FLA_Apply_G_rf_asd_var3b(), FLA_Apply_G_rf_ass_var3b(), FLA_Apply_G_rf_asz_var3b(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), and i.

◆ FLA_Apply_G_rf_ass_var3b()

FLA_Error FLA_Apply_G_rf_ass_var3b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
int  iTL,
scomplex buff_G,
int  rs_G,
int  cs_G,
float buff_A,
int  rs_A,
int  cs_A 
)
125{
127
128 return FLA_SUCCESS;
129}

References i.

Referenced by FLA_Apply_G_rf_asm_var3b(), and FLA_Apply_G_rf_bls_var3b().

◆ FLA_Apply_G_rf_asz_var3b()

FLA_Error FLA_Apply_G_rf_asz_var3b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
int  iTL,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)
627{
629
630 return FLA_SUCCESS;
631}

References i.

Referenced by FLA_Apply_G_rf_asm_var3b().