libflame revision_anchor
Functions
FLA_Tridiag_UT_l_fus_var3.c File Reference

(r)

Functions

FLA_Error FLA_Tridiag_UT_l_ofu_var3 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Tridiag_UT_l_step_ofu_var3 (FLA_Obj A, FLA_Obj Z, FLA_Obj T)
 
FLA_Error FLA_Tridiag_UT_l_step_ofs_var3 (int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_Z, int rs_Z, int cs_Z, float *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Tridiag_UT_l_step_ofd_var3 (int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_Z, int rs_Z, int cs_Z, double *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Tridiag_UT_l_step_ofc_var3 (int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Tridiag_UT_l_step_ofz_var3 (int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_T, int rs_T, int cs_T)
 

Function Documentation

◆ FLA_Tridiag_UT_l_ofu_var3()

FLA_Error FLA_Tridiag_UT_l_ofu_var3 ( FLA_Obj  A,
FLA_Obj  T 
)
14{
16 FLA_Obj Z;
17
19
21
22 FLA_Obj_free( &Z );
23
24 return r_val;
25}
FLA_Error FLA_Tridiag_UT_l_step_ofu_var3(FLA_Obj A, FLA_Obj Z, FLA_Obj T)
Definition FLA_Tridiag_UT_l_fus_var3.c:27
FLA_Error FLA_Obj_create_conf_to(FLA_Trans trans, FLA_Obj old, FLA_Obj *obj)
Definition FLA_Obj.c:286
FLA_Error FLA_Obj_free(FLA_Obj *obj)
Definition FLA_Obj.c:588
int FLA_Error
Definition FLA_type_defs.h:47
int i
Definition bl1_axmyv2.c:145
Definition FLA_type_defs.h:159

References FLA_Obj_create_conf_to(), FLA_Obj_free(), FLA_Tridiag_UT_l_step_ofu_var3(), and i.

◆ FLA_Tridiag_UT_l_step_ofc_var3()

FLA_Error FLA_Tridiag_UT_l_step_ofc_var3 ( int  m_A,
int  m_T,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_Z,
int  rs_Z,
int  cs_Z,
scomplex buff_T,
int  rs_T,
int  cs_T 
)
469{
474
479 int i;
480
481 // b_alg = FLA_Obj_length( T );
482 int b_alg = m_T;
483
484 // FLA_Set( FLA_ZERO, Z );
485 bl1_csetm( m_A,
486 b_alg,
487 buff_0,
488 buff_Z, rs_Z, cs_Z );
489
490 for ( i = 0; i < b_alg; ++i )
491 {
492 scomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
493 scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
494 scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
495 scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
496 scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
497
498 scomplex* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
499 scomplex* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
500 scomplex* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
501
502 scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
503 scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
504
505 scomplex* a10t_r = a10t + (i-1)*cs_A + (0 )*rs_A;
506
507 scomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
508 scomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
509
510 scomplex* ABL = a10t;
511 scomplex* ZBL = z10t;
512
514
515 int m_ahead = m_A - i - 1;
516 int m_behind = i;
517 int n_behind = i;
518
519 /*------------------------------------------------------------*/
520
521 if ( m_behind > 0 )
522 {
523 // FLA_Copy( a10t_r, last_elem );
524 // FLA_Set( FLA_ONE, a10t_r );
525 last_elem = *a10t_r;
526 *a10t_r = *buff_1;
527 }
528
529 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, z10t, FLA_ONE, a2 );
530 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a10t, FLA_ONE, a2 );
533 m_ahead + 1,
534 n_behind,
535 buff_m1,
536 ABL, rs_A, cs_A,
537 z10t, cs_Z,
538 buff_1,
539 a2, rs_A );
542 m_ahead + 1,
543 n_behind,
544 buff_m1,
545 ZBL, rs_Z, cs_Z,
546 a10t, cs_A,
547 buff_1,
548 a2, rs_A );
549
550 if ( m_behind > 0 )
551 {
552 // FLA_Copy( last_elem, a10t_r );
553 *a10t_r = last_elem;
554 }
555
556 if ( m_ahead > 0 )
557 {
558 // FLA_Househ2_UT( FLA_LEFT,
559 // a21_t,
560 // a21_b, tau11 );
562 a21_t,
563 a21_b, rs_A,
564 tau11 );
565
566 // FLA_Set( FLA_ONE, inv_tau11 );
567 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
568 // FLA_Copy( inv_tau11, minus_inv_tau11 );
569 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
572
573 // FLA_Copy( a21_t, first_elem );
574 // FLA_Set( FLA_ONE, a21_t );
575 first_elem = *a21_t;
576 *a21_t = *buff_1;
577
578 // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, z21 );
581 m_ahead,
582 buff_1,
583 A22, rs_A, cs_A,
584 a21, rs_A,
585 buff_0,
586 z21, rs_Z );
587
588 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, d01 );
589 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, a21, FLA_ZERO, f01 );
590 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f01, FLA_ONE, z21 );
591 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, d01, FLA_ONE, z21 );
592 // FLA_Copy( d01, t01 );
594 n_behind,
595 buff_m1,
596 A20, rs_A, cs_A,
597 Z20, rs_Z, cs_Z,
598 t01, rs_T,
599 a21, rs_A,
600 z21, rs_Z );
601
602 // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
603 // FLA_Inv_scal( FLA_TWO, beta );
605 m_ahead,
606 a21, rs_A,
607 z21, rs_Z,
608 &beta );
610
611 // FLA_Scal( minus_inv_tau11, beta );
612 // FLA_Axpy( beta, a21, z21 );
613 // FLA_Scal( inv_tau11, z21 );
616 m_ahead,
617 &beta,
618 a21, rs_A,
619 z21, rs_Z );
621 m_ahead,
622 &inv_tau11,
623 z21, rs_Z );
624
625 // FLA_Copy( first_elem, a21_t );
626 *a21_t = first_elem;
627 }
628
629 /*------------------------------------------------------------*/
630
631 }
632
633 return FLA_SUCCESS;
634}
FLA_Error FLA_Fused_UZhu_ZUhu_opc_var1(int m_U, int n_U, scomplex *buff_delta, scomplex *buff_U, int rs_U, int cs_U, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_t, int inc_t, scomplex *buff_u, int inc_u, scomplex *buff_w, int inc_w)
Definition FLA_Fused_UZhu_ZUhu_opt_var1.c:411
FLA_Obj FLA_TWO
Definition FLA_Init.c:17
FLA_Obj FLA_MINUS_ONE
Definition FLA_Init.c:22
FLA_Obj FLA_ZERO
Definition FLA_Init.c:20
FLA_Obj FLA_ONE
Definition FLA_Init.c:18
FLA_Error FLA_Househ2_UT_l_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *tau)
Definition FLA_Househ2_UT.c:390
void bl1_caxpyv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
Definition bl1_axpyv.c:29
void bl1_cdot(conj1_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho)
Definition bl1_dot.c:39
void bl1_cgemv(trans1_t transa, conj1_t conjx, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition bl1_gemv.c:125
void bl1_chemv(uplo1_t uplo, conj1_t conj, int m, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition bl1_hemv.c:35
void bl1_cscalv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx)
Definition bl1_scalv.c:46
void bl1_csetm(int m, int n, scomplex *sigma, scomplex *a, int a_rs, int a_cs)
Definition bl1_setm.c:61
@ BLIS1_LOWER_TRIANGULAR
Definition blis_type_defs.h:62
@ BLIS1_NO_TRANSPOSE
Definition blis_type_defs.h:54
@ BLIS1_CONJUGATE
Definition blis_type_defs.h:82
@ BLIS1_NO_CONJUGATE
Definition blis_type_defs.h:81
Definition blis_type_defs.h:133

References bl1_caxpyv(), bl1_cdot(), bl1_cgemv(), bl1_chemv(), bl1_cscalv(), bl1_csetm(), BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_Fused_UZhu_ZUhu_opc_var1(), FLA_Househ2_UT_l_opc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_step_ofu_var3().

◆ FLA_Tridiag_UT_l_step_ofd_var3()

FLA_Error FLA_Tridiag_UT_l_step_ofd_var3 ( int  m_A,
int  m_T,
double buff_A,
int  rs_A,
int  cs_A,
double buff_Z,
int  rs_Z,
int  cs_Z,
double buff_T,
int  rs_T,
int  cs_T 
)
296{
297 double* buff_2 = FLA_DOUBLE_PTR( FLA_TWO );
298 double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
299 double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
301
302 double first_elem, last_elem;
303 double beta;
304 double inv_tau11;
305 double minus_inv_tau11;
306 int i;
307
308 // b_alg = FLA_Obj_length( T );
309 int b_alg = m_T;
310
311 // FLA_Set( FLA_ZERO, Z );
312 bl1_dsetm( m_A,
313 b_alg,
314 buff_0,
315 buff_Z, rs_Z, cs_Z );
316
317 for ( i = 0; i < b_alg; ++i )
318 {
319 double* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
320 double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
321 double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
322 double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
323 double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
324
325 double* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
326 double* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
327 double* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
328
329 double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
330 double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
331
332 double* a10t_r = a10t + (i-1)*cs_A + (0 )*rs_A;
333
334 double* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
335 double* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
336
337 double* ABL = a10t;
338 double* ZBL = z10t;
339
340 double* a2 = alpha11;
341
342 int m_ahead = m_A - i - 1;
343 int m_behind = i;
344 int n_behind = i;
345
346 /*------------------------------------------------------------*/
347
348 if ( m_behind > 0 )
349 {
350 // FLA_Copy( a10t_r, last_elem );
351 // FLA_Set( FLA_ONE, a10t_r );
352 last_elem = *a10t_r;
353 *a10t_r = *buff_1;
354 }
355
356 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, z10t, FLA_ONE, a2 );
357 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a10t, FLA_ONE, a2 );
360 m_ahead + 1,
361 n_behind,
362 buff_m1,
363 ABL, rs_A, cs_A,
364 z10t, cs_Z,
365 buff_1,
366 a2, rs_A );
369 m_ahead + 1,
370 n_behind,
371 buff_m1,
372 ZBL, rs_Z, cs_Z,
373 a10t, cs_A,
374 buff_1,
375 a2, rs_A );
376
377 if ( m_behind > 0 )
378 {
379 // FLA_Copy( last_elem, a10t_r );
380 *a10t_r = last_elem;
381 }
382
383 if ( m_ahead > 0 )
384 {
385 // FLA_Househ2_UT( FLA_LEFT,
386 // a21_t,
387 // a21_b, tau11 );
389 a21_t,
390 a21_b, rs_A,
391 tau11 );
392
393 // FLA_Set( FLA_ONE, inv_tau11 );
394 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
395 // FLA_Copy( inv_tau11, minus_inv_tau11 );
396 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
399
400 // FLA_Copy( a21_t, first_elem );
401 // FLA_Set( FLA_ONE, a21_t );
402 first_elem = *a21_t;
403 *a21_t = *buff_1;
404
405 // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, z21 );
407 m_ahead,
408 buff_1,
409 A22, rs_A, cs_A,
410 a21, rs_A,
411 buff_0,
412 z21, rs_Z );
413
414 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, d01 );
415 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, a21, FLA_ZERO, f01 );
416 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f01, FLA_ONE, z21 );
417 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, d01, FLA_ONE, z21 );
418 // FLA_Copy( d01, t01 );
420 n_behind,
421 buff_m1,
422 A20, rs_A, cs_A,
423 Z20, rs_Z, cs_Z,
424 t01, rs_T,
425 a21, rs_A,
426 z21, rs_Z );
427
428 // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
429 // FLA_Inv_scal( FLA_TWO, beta );
431 m_ahead,
432 a21, rs_A,
433 z21, rs_Z,
434 &beta );
436
437 // FLA_Scal( minus_inv_tau11, beta );
438 // FLA_Axpy( beta, a21, z21 );
439 // FLA_Scal( inv_tau11, z21 );
442 m_ahead,
443 &beta,
444 a21, rs_A,
445 z21, rs_Z );
447 m_ahead,
448 &inv_tau11,
449 z21, rs_Z );
450
451 // FLA_Copy( first_elem, a21_t );
452 *a21_t = first_elem;
453 }
454
455 /*------------------------------------------------------------*/
456
457 }
458
459 return FLA_SUCCESS;
460}
FLA_Error FLA_Fused_UZhu_ZUhu_opd_var1(int m_U, int n_U, double *buff_delta, double *buff_U, int rs_U, int cs_U, double *buff_Z, int rs_Z, int cs_Z, double *buff_t, int inc_t, double *buff_u, int inc_u, double *buff_w, int inc_w)
Definition FLA_Fused_UZhu_ZUhu_opt_var1.c:222
FLA_Error FLA_Househ2_UT_l_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *tau)
Definition FLA_Househ2_UT.c:274
void bl1_daxpyv(conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy)
Definition bl1_axpyv.c:21
void bl1_ddot(conj1_t conj, int n, double *x, int incx, double *y, int incy, double *rho)
Definition bl1_dot.c:26
void bl1_dgemv(trans1_t transa, conj1_t conjx, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
Definition bl1_gemv.c:69
void bl1_dscalv(conj1_t conj, int n, double *alpha, double *x, int incx)
Definition bl1_scalv.c:24
void bl1_dsymv(uplo1_t uplo, int m, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
Definition bl1_symv.c:56
void bl1_dsetm(int m, int n, double *sigma, double *a, int a_rs, int a_cs)
Definition bl1_setm.c:45

References bl1_daxpyv(), bl1_ddot(), bl1_dgemv(), bl1_dscalv(), bl1_dsetm(), bl1_dsymv(), BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_Fused_UZhu_ZUhu_opd_var1(), FLA_Househ2_UT_l_opd(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_step_ofu_var3().

◆ FLA_Tridiag_UT_l_step_ofs_var3()

FLA_Error FLA_Tridiag_UT_l_step_ofs_var3 ( int  m_A,
int  m_T,
float buff_A,
int  rs_A,
int  cs_A,
float buff_Z,
int  rs_Z,
int  cs_Z,
float buff_T,
int  rs_T,
int  cs_T 
)
123{
124 float* buff_2 = FLA_FLOAT_PTR( FLA_TWO );
125 float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
126 float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
128
129 float first_elem, last_elem;
130 float beta;
131 float inv_tau11;
132 float minus_inv_tau11;
133 int i;
134
135 // b_alg = FLA_Obj_length( T );
136 int b_alg = m_T;
137
138 // FLA_Set( FLA_ZERO, Z );
139 bl1_ssetm( m_A,
140 b_alg,
141 buff_0,
142 buff_Z, rs_Z, cs_Z );
143
144 for ( i = 0; i < b_alg; ++i )
145 {
146 float* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
147 float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
148 float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
149 float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
150 float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
151
152 float* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
153 float* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
154 float* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
155
156 float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
157 float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
158
159 float* a10t_r = a10t + (i-1)*cs_A + (0 )*rs_A;
160
161 float* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
162 float* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
163
164 float* ABL = a10t;
165 float* ZBL = z10t;
166
167 float* a2 = alpha11;
168
169 int m_ahead = m_A - i - 1;
170 int m_behind = i;
171 int n_behind = i;
172
173 /*------------------------------------------------------------*/
174
175 if ( m_behind > 0 )
176 {
177 // FLA_Copy( a10t_r, last_elem );
178 // FLA_Set( FLA_ONE, a10t_r );
179 last_elem = *a10t_r;
180 *a10t_r = *buff_1;
181 }
182
183 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, z10t, FLA_ONE, a2 );
184 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a10t, FLA_ONE, a2 );
187 m_ahead + 1,
188 n_behind,
189 buff_m1,
190 ABL, rs_A, cs_A,
191 z10t, cs_Z,
192 buff_1,
193 a2, rs_A );
196 m_ahead + 1,
197 n_behind,
198 buff_m1,
199 ZBL, rs_Z, cs_Z,
200 a10t, cs_A,
201 buff_1,
202 a2, rs_A );
203
204 if ( m_behind > 0 )
205 {
206 // FLA_Copy( last_elem, a10t_r );
207 *a10t_r = last_elem;
208 }
209
210 if ( m_ahead > 0 )
211 {
212 // FLA_Househ2_UT( FLA_LEFT,
213 // a21_t,
214 // a21_b, tau11 );
216 a21_t,
217 a21_b, rs_A,
218 tau11 );
219
220 // FLA_Set( FLA_ONE, inv_tau11 );
221 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
222 // FLA_Copy( inv_tau11, minus_inv_tau11 );
223 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
226
227 // FLA_Copy( a21_t, first_elem );
228 // FLA_Set( FLA_ONE, a21_t );
229 first_elem = *a21_t;
230 *a21_t = *buff_1;
231
232 // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, z21 );
234 m_ahead,
235 buff_1,
236 A22, rs_A, cs_A,
237 a21, rs_A,
238 buff_0,
239 z21, rs_Z );
240
241 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, d01 );
242 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, a21, FLA_ZERO, f01 );
243 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f01, FLA_ONE, z21 );
244 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, d01, FLA_ONE, z21 );
245 // FLA_Copy( d01, t01 );
247 n_behind,
248 buff_m1,
249 A20, rs_A, cs_A,
250 Z20, rs_Z, cs_Z,
251 t01, rs_T,
252 a21, rs_A,
253 z21, rs_Z );
254
255 // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
256 // FLA_Inv_scal( FLA_TWO, beta );
258 m_ahead,
259 a21, rs_A,
260 z21, rs_Z,
261 &beta );
263
264 // FLA_Scal( minus_inv_tau11, beta );
265 // FLA_Axpy( beta, a21, z21 );
266 // FLA_Scal( inv_tau11, z21 );
269 m_ahead,
270 &beta,
271 a21, rs_A,
272 z21, rs_Z );
274 m_ahead,
275 &inv_tau11,
276 z21, rs_Z );
277
278 // FLA_Copy( first_elem, a21_t );
279 *a21_t = first_elem;
280 }
281
282 /*------------------------------------------------------------*/
283
284 }
285
286 return FLA_SUCCESS;
287}
FLA_Error FLA_Fused_UZhu_ZUhu_ops_var1(int m_U, int n_U, float *buff_delta, float *buff_U, int rs_U, int cs_U, float *buff_Z, int rs_Z, int cs_Z, float *buff_t, int inc_t, float *buff_u, int inc_u, float *buff_w, int inc_w)
Definition FLA_Fused_UZhu_ZUhu_opt_var1.c:136
FLA_Error FLA_Househ2_UT_l_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *tau)
Definition FLA_Househ2_UT.c:160
void bl1_saxpyv(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy)
Definition bl1_axpyv.c:13
void bl1_sdot(conj1_t conj, int n, float *x, int incx, float *y, int incy, float *rho)
Definition bl1_dot.c:13
void bl1_sgemv(trans1_t transa, conj1_t conjx, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
Definition bl1_gemv.c:13
void bl1_sscalv(conj1_t conj, int n, float *alpha, float *x, int incx)
Definition bl1_scalv.c:13
void bl1_ssymv(uplo1_t uplo, int m, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
Definition bl1_symv.c:13
void bl1_ssetm(int m, int n, float *sigma, float *a, int a_rs, int a_cs)
Definition bl1_setm.c:29

References bl1_saxpyv(), bl1_sdot(), bl1_sgemv(), bl1_sscalv(), bl1_ssetm(), bl1_ssymv(), BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_Fused_UZhu_ZUhu_ops_var1(), FLA_Househ2_UT_l_ops(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_step_ofu_var3().

◆ FLA_Tridiag_UT_l_step_ofu_var3()

FLA_Error FLA_Tridiag_UT_l_step_ofu_var3 ( FLA_Obj  A,
FLA_Obj  Z,
FLA_Obj  T 
)
28{
29 FLA_Datatype datatype;
30 int m_A, m_T;
31 int rs_A, cs_A;
32 int rs_Z, cs_Z;
33 int rs_T, cs_T;
34
35 datatype = FLA_Obj_datatype( A );
36
37 m_A = FLA_Obj_length( A );
38 m_T = FLA_Obj_length( T );
39
42
45
48
49
50 switch ( datatype )
51 {
52 case FLA_FLOAT:
53 {
54 float* buff_A = FLA_FLOAT_PTR( A );
55 float* buff_Z = FLA_FLOAT_PTR( Z );
56 float* buff_T = FLA_FLOAT_PTR( T );
57
59 m_T,
62 buff_T, rs_T, cs_T );
63
64 break;
65 }
66
67 case FLA_DOUBLE:
68 {
69 double* buff_A = FLA_DOUBLE_PTR( A );
70 double* buff_Z = FLA_DOUBLE_PTR( Z );
71 double* buff_T = FLA_DOUBLE_PTR( T );
72
74 m_T,
77 buff_T, rs_T, cs_T );
78
79 break;
80 }
81
82 case FLA_COMPLEX:
83 {
87
89 m_T,
92 buff_T, rs_T, cs_T );
93
94 break;
95 }
96
98 {
102
104 m_T,
105 buff_A, rs_A, cs_A,
106 buff_Z, rs_Z, cs_Z,
107 buff_T, rs_T, cs_T );
108
109 break;
110 }
111 }
112
113 return FLA_SUCCESS;
114}
FLA_Error FLA_Tridiag_UT_l_step_ofc_var3(int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_T, int rs_T, int cs_T)
Definition FLA_Tridiag_UT_l_fus_var3.c:464
FLA_Error FLA_Tridiag_UT_l_step_ofd_var3(int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_Z, int rs_Z, int cs_Z, double *buff_T, int rs_T, int cs_T)
Definition FLA_Tridiag_UT_l_fus_var3.c:291
FLA_Error FLA_Tridiag_UT_l_step_ofz_var3(int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_T, int rs_T, int cs_T)
Definition FLA_Tridiag_UT_l_fus_var3.c:638
FLA_Error FLA_Tridiag_UT_l_step_ofs_var3(int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_Z, int rs_Z, int cs_Z, float *buff_T, int rs_T, int cs_T)
Definition FLA_Tridiag_UT_l_fus_var3.c:118
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition FLA_Query.c:167
dim_t FLA_Obj_length(FLA_Obj obj)
Definition FLA_Query.c:116
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition FLA_Query.c:174
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition FLA_Query.c:13
int FLA_Datatype
Definition FLA_type_defs.h:49
Definition blis_type_defs.h:138

References FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Tridiag_UT_l_step_ofc_var3(), FLA_Tridiag_UT_l_step_ofd_var3(), FLA_Tridiag_UT_l_step_ofs_var3(), FLA_Tridiag_UT_l_step_ofz_var3(), and i.

Referenced by FLA_Tridiag_UT_l_blf_var3(), and FLA_Tridiag_UT_l_ofu_var3().

◆ FLA_Tridiag_UT_l_step_ofz_var3()

FLA_Error FLA_Tridiag_UT_l_step_ofz_var3 ( int  m_A,
int  m_T,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_Z,
int  rs_Z,
int  cs_Z,
dcomplex buff_T,
int  rs_T,
int  cs_T 
)
643{
648
653 int i;
654
655 // b_alg = FLA_Obj_length( T );
656 int b_alg = m_T;
657
658 // FLA_Set( FLA_ZERO, Z );
659 bl1_zsetm( m_A,
660 b_alg,
661 buff_0,
662 buff_Z, rs_Z, cs_Z );
663
664 for ( i = 0; i < b_alg; ++i )
665 {
666 dcomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
667 dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
668 dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
669 dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
670 dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
671
672 dcomplex* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
673 dcomplex* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
674 dcomplex* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
675
676 dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
677 dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
678
679 dcomplex* a10t_r = a10t + (i-1)*cs_A + (0 )*rs_A;
680
681 dcomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
682 dcomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
683
684 dcomplex* ABL = a10t;
685 dcomplex* ZBL = z10t;
686
688
689 int m_ahead = m_A - i - 1;
690 int m_behind = i;
691 int n_behind = i;
692
693 /*------------------------------------------------------------*/
694
695 if ( m_behind > 0 )
696 {
697 // FLA_Copy( a10t_r, last_elem );
698 // FLA_Set( FLA_ONE, a10t_r );
699 last_elem = *a10t_r;
700 *a10t_r = *buff_1;
701 }
702
703 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, z10t, FLA_ONE, a2 );
704 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a10t, FLA_ONE, a2 );
707 m_ahead + 1,
708 n_behind,
709 buff_m1,
710 ABL, rs_A, cs_A,
711 z10t, cs_Z,
712 buff_1,
713 a2, rs_A );
716 m_ahead + 1,
717 n_behind,
718 buff_m1,
719 ZBL, rs_Z, cs_Z,
720 a10t, cs_A,
721 buff_1,
722 a2, rs_A );
723
724 if ( m_behind > 0 )
725 {
726 // FLA_Copy( last_elem, a10t_r );
727 *a10t_r = last_elem;
728 }
729
730 if ( m_ahead > 0 )
731 {
732 // FLA_Househ2_UT( FLA_LEFT,
733 // a21_t,
734 // a21_b, tau11 );
736 a21_t,
737 a21_b, rs_A,
738 tau11 );
739
740 // FLA_Set( FLA_ONE, inv_tau11 );
741 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
742 // FLA_Copy( inv_tau11, minus_inv_tau11 );
743 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
746
747 // FLA_Copy( a21_t, first_elem );
748 // FLA_Set( FLA_ONE, a21_t );
749 first_elem = *a21_t;
750 *a21_t = *buff_1;
751
752 // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, z21 );
755 m_ahead,
756 buff_1,
757 A22, rs_A, cs_A,
758 a21, rs_A,
759 buff_0,
760 z21, rs_Z );
761
762 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, d01 );
763 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, a21, FLA_ZERO, f01 );
764 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f01, FLA_ONE, z21 );
765 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, d01, FLA_ONE, z21 );
766 // FLA_Copy( d01, t01 );
768 n_behind,
769 buff_m1,
770 A20, rs_A, cs_A,
771 Z20, rs_Z, cs_Z,
772 t01, rs_T,
773 a21, rs_A,
774 z21, rs_Z );
775
776 // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
777 // FLA_Inv_scal( FLA_TWO, beta );
779 m_ahead,
780 a21, rs_A,
781 z21, rs_Z,
782 &beta );
784
785 // FLA_Scal( minus_inv_tau11, beta );
786 // FLA_Axpy( beta, a21, z21 );
787 // FLA_Scal( inv_tau11, z21 );
790 m_ahead,
791 &beta,
792 a21, rs_A,
793 z21, rs_Z );
795 m_ahead,
796 &inv_tau11,
797 z21, rs_Z );
798
799 // FLA_Copy( first_elem, a21_t );
800 *a21_t = first_elem;
801 }
802
803 /*------------------------------------------------------------*/
804
805 }
806
807 return FLA_SUCCESS;
808}
FLA_Error FLA_Fused_UZhu_ZUhu_opz_var1(int m_U, int n_U, dcomplex *buff_delta, dcomplex *buff_U, int rs_U, int cs_U, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_t, int inc_t, dcomplex *buff_u, int inc_u, dcomplex *buff_w, int inc_w)
Definition FLA_Fused_UZhu_ZUhu_opt_var1.c:485
FLA_Error FLA_Househ2_UT_l_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *tau)
Definition FLA_Househ2_UT.c:521
void bl1_zaxpyv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy)
Definition bl1_axpyv.c:60
void bl1_zdot(conj1_t conj, int n, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *rho)
Definition bl1_dot.c:65
bl1_zscals(beta, rho_yz)
void bl1_zgemv(trans1_t transa, conj1_t conjx, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition bl1_gemv.c:255
void bl1_zhemv(uplo1_t uplo, conj1_t conj, int m, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition bl1_hemv.c:134
void bl1_zscalv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx)
Definition bl1_scalv.c:72
void bl1_zsetm(int m, int n, dcomplex *sigma, dcomplex *a, int a_rs, int a_cs)
Definition bl1_setm.c:78

References bl1_zaxpyv(), bl1_zdot(), bl1_zgemv(), bl1_zhemv(), bl1_zscals(), bl1_zscalv(), bl1_zsetm(), BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_Fused_UZhu_ZUhu_opz_var1(), FLA_Househ2_UT_l_opz(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_step_ofu_var3().