libflame revision_anchor
Functions
FLA_Bidiag_UT_u_fus_var2.c File Reference

(r)

Functions

FLA_Error FLA_Bidiag_UT_u_ofu_var2 (FLA_Obj A, FLA_Obj TU, FLA_Obj TV)
 
FLA_Error FLA_Bidiag_UT_u_step_ofu_var2 (FLA_Obj A, FLA_Obj T, FLA_Obj S)
 
FLA_Error FLA_Bidiag_UT_u_step_ofs_var2 (int m_A, int n_A, int m_TS, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T, float *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_ofd_var2 (int m_A, int n_A, int m_TS, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T, double *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_ofc_var2 (int m_A, int n_A, int m_TS, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T, scomplex *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_ofz_var2 (int m_A, int n_A, int m_TS, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T, dcomplex *buff_S, int rs_S, int cs_S)
 

Function Documentation

◆ FLA_Bidiag_UT_u_ofu_var2()

FLA_Error FLA_Bidiag_UT_u_ofu_var2 ( FLA_Obj  A,
FLA_Obj  TU,
FLA_Obj  TV 
)
14{
16}
FLA_Error FLA_Bidiag_UT_u_step_ofu_var2(FLA_Obj A, FLA_Obj T, FLA_Obj S)
Definition FLA_Bidiag_UT_u_fus_var2.c:18
int i
Definition bl1_axmyv2.c:145

References FLA_Bidiag_UT_u_step_ofu_var2(), and i.

◆ FLA_Bidiag_UT_u_step_ofc_var2()

FLA_Error FLA_Bidiag_UT_u_step_ofc_var2 ( int  m_A,
int  n_A,
int  m_TS,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_T,
int  rs_T,
int  cs_T,
scomplex buff_S,
int  rs_S,
int  cs_S 
)
520{
524
526 int i;
527
528 // b_alg = FLA_Obj_length( T );
529 int b_alg = m_TS;
530
531 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
532 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
533 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
534 scomplex* buff_v = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
535 scomplex* buff_y = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
536 scomplex* buff_z = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
537 int inc_v = 1;
538 int inc_y = 1;
539 int inc_z = 1;
540
541 for ( i = 0; i < b_alg; ++i )
542 {
543 scomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
544 scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
545 scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
546 scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
547 scomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
548 scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
549 scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
550
551 scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
552 scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
553
554 scomplex* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
555 scomplex* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
556
557 scomplex* v21 = buff_v + (i+1)*inc_v;
558
559 scomplex* y21 = buff_y + (i+1)*inc_y;
560
561 scomplex* z21 = buff_z + (i+1)*inc_z;
562
563 scomplex* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
564 scomplex* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
565
566 scomplex* v21_t = v21 + (0 )*inc_v;
567 scomplex* v21_b = v21 + (1 )*inc_v;
568
569 int m_ahead = m_A - i - 1;
570 int n_ahead = n_A - i - 1;
571 int m_behind = i;
572 int n_behind = i;
573
574 /*------------------------------------------------------------*/
575
576 // FLA_Househ2_UT( FLA_LEFT,
577 // alpha11,
578 // a21, tau11 );
580 alpha11,
581 a21, rs_A,
582 tau11 );
583
584 if ( n_ahead > 0 )
585 {
586 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a12t, y21 );
587 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, a21, FLA_ONE, y21 );
589 n_ahead,
590 a12t, cs_A,
591 y21, inc_y );
594 m_ahead,
595 n_ahead,
596 buff_1,
597 A22, rs_A, cs_A,
598 a21, rs_A,
599 buff_1,
600 y21, inc_y );
601
602 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
604 n_ahead,
605 tau11,
606 y21, inc_y );
607
608 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, y21, a12t );
610 n_ahead,
611 buff_m1,
612 y21, inc_y,
613 a12t, cs_A );
614
615 // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
617 a12t_l,
618 a12t_r, cs_A,
619 sigma11 );
620
621 // FLA_Set( FLA_ONE, v21_t );
622 // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
623 *v21_t = *buff_1;
625 n_ahead - 1,
626 a12t_r, cs_A,
627 v21_b, inc_y );
628
629 // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
630 // FLA_Scal( FLA_MINUS_ONE, beta );
632 n_ahead,
633 y21, inc_y,
634 v21, inc_v,
635 &beta );
636 bl1_cneg1( &beta );
637
638 // FLA_Copy( a21, z21 );
639 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, v21, beta, z21 );
640 // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
642 m_ahead,
643 a21, rs_A,
644 z21, inc_z );
647 m_ahead,
648 n_ahead,
649 buff_1,
650 A22, rs_A, cs_A,
651 v21, inc_v,
652 &beta,
653 z21, inc_z );
655 m_ahead,
656 sigma11,
657 z21, inc_z );
658
659 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, a21, y21, A22 );
660 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
662 n_ahead,
663 buff_m1,
664 a21, rs_A,
665 y21, inc_y,
666 z21, inc_z,
667 v21, inc_v,
668 A22, rs_A, cs_A );
669
670 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
673 m_behind,
674 n_ahead,
675 buff_1,
676 A02, rs_A, cs_A,
677 v21, inc_v,
678 buff_0,
679 s01, rs_S );
680 }
681
682 // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
683 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
685 n_behind,
686 a10t, cs_A,
687 t01, rs_T );
690 m_ahead,
691 n_behind,
692 buff_1,
693 A20, rs_A, cs_A,
694 a21, rs_A,
695 buff_1,
696 t01, rs_T );
697
698 /*------------------------------------------------------------*/
699
700 }
701
702 // FLA_Obj_free( &v );
703 // FLA_Obj_free( &y );
704 // FLA_Obj_free( &z );
705 FLA_free( buff_v );
706 FLA_free( buff_y );
707 FLA_free( buff_z );
708
709 return FLA_SUCCESS;
710}
FLA_Error FLA_Fused_Gerc2_opc_var1(int m_A, int n_A, scomplex *buff_alpha, scomplex *buff_u, int inc_u, scomplex *buff_y, int inc_y, scomplex *buff_z, int inc_z, scomplex *buff_v, int inc_v, scomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Fused_Gerc2_opt_var1.c:241
FLA_Obj FLA_MINUS_ONE
Definition FLA_Init.c:22
FLA_Obj FLA_ZERO
Definition FLA_Init.c:20
FLA_Obj FLA_ONE
Definition FLA_Init.c:18
void FLA_free(void *ptr)
Definition FLA_Memory.c:247
void * FLA_malloc(size_t size)
Definition FLA_Memory.c:111
FLA_Error FLA_Househ2_UT_r_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *tau)
Definition FLA_Househ2_UT.c:677
FLA_Error FLA_Househ2_UT_l_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *tau)
Definition FLA_Househ2_UT.c:390
void bl1_caxpyv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
Definition bl1_axpyv.c:29
void bl1_ccopyv(conj1_t conj, int m, scomplex *x, int incx, scomplex *y, int incy)
Definition bl1_copyv.c:49
void bl1_cdot(conj1_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho)
Definition bl1_dot.c:39
void bl1_cgemv(trans1_t transa, conj1_t conjx, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition bl1_gemv.c:125
void bl1_cinvscalv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx)
Definition bl1_invscalv.c:52
@ BLIS1_NO_TRANSPOSE
Definition blis_type_defs.h:54
@ BLIS1_CONJ_TRANSPOSE
Definition blis_type_defs.h:57
@ BLIS1_CONJ_NO_TRANSPOSE
Definition blis_type_defs.h:56
@ BLIS1_CONJUGATE
Definition blis_type_defs.h:82
@ BLIS1_NO_CONJUGATE
Definition blis_type_defs.h:81
Definition blis_type_defs.h:133

References bl1_caxpyv(), bl1_ccopyv(), bl1_cdot(), bl1_cgemv(), bl1_cinvscalv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Fused_Gerc2_opc_var1(), FLA_Househ2_UT_l_opc(), FLA_Househ2_UT_r_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_ofu_var2().

◆ FLA_Bidiag_UT_u_step_ofd_var2()

FLA_Error FLA_Bidiag_UT_u_step_ofd_var2 ( int  m_A,
int  n_A,
int  m_TS,
double buff_A,
int  rs_A,
int  cs_A,
double buff_T,
int  rs_T,
int  cs_T,
double buff_S,
int  rs_S,
int  cs_S 
)
320{
321 double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
322 double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
324
325 double beta;
326 int i;
327
328 // b_alg = FLA_Obj_length( T );
329 int b_alg = m_TS;
330
331 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
332 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
333 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
334 double* buff_v = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
335 double* buff_y = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
336 double* buff_z = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
337 int inc_v = 1;
338 int inc_y = 1;
339 int inc_z = 1;
340
341 for ( i = 0; i < b_alg; ++i )
342 {
343 double* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
344 double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
345 double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
346 double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
347 double* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
348 double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
349 double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
350
351 double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
352 double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
353
354 double* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
355 double* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
356
357 double* v21 = buff_v + (i+1)*inc_v;
358
359 double* y21 = buff_y + (i+1)*inc_y;
360
361 double* z21 = buff_z + (i+1)*inc_z;
362
363 double* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
364 double* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
365
366 double* v21_t = v21 + (0 )*inc_v;
367 double* v21_b = v21 + (1 )*inc_v;
368
369 int m_ahead = m_A - i - 1;
370 int n_ahead = n_A - i - 1;
371 int m_behind = i;
372 int n_behind = i;
373
374 /*------------------------------------------------------------*/
375
376 // FLA_Househ2_UT( FLA_LEFT,
377 // alpha11,
378 // a21, tau11 );
380 alpha11,
381 a21, rs_A,
382 tau11 );
383
384 if ( n_ahead > 0 )
385 {
386 // FLA_Copyt( FLA_TRANSPOSE, a12t, y21 );
387 // FLA_Gemvc( FLA_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a21, FLA_ONE, y21 );
389 n_ahead,
390 a12t, cs_A,
391 y21, inc_y );
394 m_ahead,
395 n_ahead,
396 buff_1,
397 A22, rs_A, cs_A,
398 a21, rs_A,
399 buff_1,
400 y21, inc_y );
401
402 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
404 n_ahead,
405 tau11,
406 y21, inc_y );
407
408 // FLA_Axpyt( FLA_TRANSPOSE, FLA_MINUS_ONE, y21, a12t );
410 n_ahead,
411 buff_m1,
412 y21, inc_y,
413 a12t, cs_A );
414
415 // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
417 a12t_l,
418 a12t_r, cs_A,
419 sigma11 );
420
421 // FLA_Set( FLA_ONE, v21_t );
422 // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
423 *v21_t = *buff_1;
425 n_ahead - 1,
426 a12t_r, cs_A,
427 v21_b, inc_y );
428
429 // FLA_Dotc( FLA_CONJUGATE, v21, y21, beta );
430 // FLA_Scal( FLA_MINUS_ONE, beta );
432 n_ahead,
433 v21, inc_v,
434 y21, inc_y,
435 &beta );
436 bl1_dneg1( &beta );
437
438 // FLA_Copy( a21, z21 );
439 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, v21, beta, z21 );
440 // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
442 m_ahead,
443 a21, rs_A,
444 z21, inc_z );
447 m_ahead,
448 n_ahead,
449 buff_1,
450 A22, rs_A, cs_A,
451 v21, inc_v,
452 &beta,
453 z21, inc_z );
455 m_ahead,
456 sigma11,
457 z21, inc_z );
458
459 // FLA_Ger( FLA_MINUS_ONE, a21, y21, A22 );
460 // FLA_Ger( FLA_MINUS_ONE, z21, v21, A22 );
462 n_ahead,
463 buff_m1,
464 a21, rs_A,
465 y21, inc_y,
466 z21, inc_z,
467 v21, inc_v,
468 A22, rs_A, cs_A );
469
470 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
473 m_behind,
474 n_ahead,
475 buff_1,
476 A02, rs_A, cs_A,
477 v21, inc_v,
478 buff_0,
479 s01, rs_S );
480 }
481
482 // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
483 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
485 n_behind,
486 a10t, cs_A,
487 t01, rs_T );
490 m_ahead,
491 n_behind,
492 buff_1,
493 A20, rs_A, cs_A,
494 a21, rs_A,
495 buff_1,
496 t01, rs_T );
497
498 /*------------------------------------------------------------*/
499
500 }
501
502 // FLA_Obj_free( &v );
503 // FLA_Obj_free( &y );
504 // FLA_Obj_free( &z );
505 FLA_free( buff_v );
506 FLA_free( buff_y );
507 FLA_free( buff_z );
508
509 return FLA_SUCCESS;
510}
FLA_Error FLA_Fused_Gerc2_opd_var1(int m_A, int n_A, double *buff_alpha, double *buff_u, int inc_u, double *buff_y, int inc_y, double *buff_z, int inc_z, double *buff_v, int inc_v, double *buff_A, int rs_A, int cs_A)
Definition FLA_Fused_Gerc2_opt_var1.c:193
FLA_Error FLA_Househ2_UT_r_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *tau)
Definition FLA_Househ2_UT.c:664
FLA_Error FLA_Househ2_UT_l_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *tau)
Definition FLA_Househ2_UT.c:274
void bl1_daxpyv(conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy)
Definition bl1_axpyv.c:21
void bl1_dcopyv(conj1_t conj, int m, double *x, int incx, double *y, int incy)
Definition bl1_copyv.c:42
void bl1_ddot(conj1_t conj, int n, double *x, int incx, double *y, int incy, double *rho)
Definition bl1_dot.c:26
void bl1_dgemv(trans1_t transa, conj1_t conjx, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
Definition bl1_gemv.c:69
void bl1_dinvscalv(conj1_t conj, int n, double *alpha, double *x, int incx)
Definition bl1_invscalv.c:26
@ BLIS1_TRANSPOSE
Definition blis_type_defs.h:55

References bl1_daxpyv(), bl1_dcopyv(), bl1_ddot(), bl1_dgemv(), bl1_dinvscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_free(), FLA_Fused_Gerc2_opd_var1(), FLA_Househ2_UT_l_opd(), FLA_Househ2_UT_r_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_ofu_var2().

◆ FLA_Bidiag_UT_u_step_ofs_var2()

FLA_Error FLA_Bidiag_UT_u_step_ofs_var2 ( int  m_A,
int  n_A,
int  m_TS,
float buff_A,
int  rs_A,
int  cs_A,
float buff_T,
int  rs_T,
int  cs_T,
float buff_S,
int  rs_S,
int  cs_S 
)
120{
121 float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
122 float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
124
125 float beta;
126 int i;
127
128 // b_alg = FLA_Obj_length( T );
129 int b_alg = m_TS;
130
131 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
132 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
133 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
134 float* buff_v = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
135 float* buff_y = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
136 float* buff_z = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
137 int inc_v = 1;
138 int inc_y = 1;
139 int inc_z = 1;
140
141 for ( i = 0; i < b_alg; ++i )
142 {
143 float* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
144 float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
145 float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
146 float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
147 float* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
148 float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
149 float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
150
151 float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
152 float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
153
154 float* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
155 float* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
156
157 float* v21 = buff_v + (i+1)*inc_v;
158
159 float* y21 = buff_y + (i+1)*inc_y;
160
161 float* z21 = buff_z + (i+1)*inc_z;
162
163 float* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
164 float* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
165
166 float* v21_t = v21 + (0 )*inc_v;
167 float* v21_b = v21 + (1 )*inc_v;
168
169 int m_ahead = m_A - i - 1;
170 int n_ahead = n_A - i - 1;
171 int m_behind = i;
172 int n_behind = i;
173
174 /*------------------------------------------------------------*/
175
176 // FLA_Househ2_UT( FLA_LEFT,
177 // alpha11,
178 // a21, tau11 );
180 alpha11,
181 a21, rs_A,
182 tau11 );
183
184 if ( n_ahead > 0 )
185 {
186 // FLA_Copyt( FLA_TRANSPOSE, a12t, y21 );
187 // FLA_Gemvc( FLA_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a21, FLA_ONE, y21 );
189 n_ahead,
190 a12t, cs_A,
191 y21, inc_y );
194 m_ahead,
195 n_ahead,
196 buff_1,
197 A22, rs_A, cs_A,
198 a21, rs_A,
199 buff_1,
200 y21, inc_y );
201
202 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
204 n_ahead,
205 tau11,
206 y21, inc_y );
207
208 // FLA_Axpyt( FLA_TRANSPOSE, FLA_MINUS_ONE, y21, a12t );
210 n_ahead,
211 buff_m1,
212 y21, inc_y,
213 a12t, cs_A );
214
215 // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
217 a12t_l,
218 a12t_r, cs_A,
219 sigma11 );
220
221 // FLA_Set( FLA_ONE, v21_t );
222 // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
223 *v21_t = *buff_1;
225 n_ahead - 1,
226 a12t_r, cs_A,
227 v21_b, inc_y );
228
229 // FLA_Dotc( FLA_CONJUGATE, v21, y21, beta );
230 // FLA_Scal( FLA_MINUS_ONE, beta );
232 n_ahead,
233 v21, inc_v,
234 y21, inc_y,
235 &beta );
236 bl1_sneg1( &beta );
237
238 // FLA_Copy( a21, z21 );
239 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, v21, beta, z21 );
240 // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
242 m_ahead,
243 a21, rs_A,
244 z21, inc_z );
247 m_ahead,
248 n_ahead,
249 buff_1,
250 A22, rs_A, cs_A,
251 v21, inc_v,
252 &beta,
253 z21, inc_z );
255 m_ahead,
256 sigma11,
257 z21, inc_z );
258
259 // FLA_Ger( FLA_MINUS_ONE, a21, y21, A22 );
260 // FLA_Ger( FLA_MINUS_ONE, z21, v21, A22 );
262 n_ahead,
263 buff_m1,
264 a21, rs_A,
265 y21, inc_y,
266 z21, inc_z,
267 v21, inc_v,
268 A22, rs_A, cs_A );
269
270 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
273 m_behind,
274 n_ahead,
275 buff_1,
276 A02, rs_A, cs_A,
277 v21, inc_v,
278 buff_0,
279 s01, rs_S );
280 }
281
282 // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
283 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
285 n_behind,
286 a10t, cs_A,
287 t01, rs_T );
290 m_ahead,
291 n_behind,
292 buff_1,
293 A20, rs_A, cs_A,
294 a21, rs_A,
295 buff_1,
296 t01, rs_T );
297
298 /*------------------------------------------------------------*/
299
300 }
301
302 // FLA_Obj_free( &v );
303 // FLA_Obj_free( &y );
304 // FLA_Obj_free( &z );
305 FLA_free( buff_v );
306 FLA_free( buff_y );
307 FLA_free( buff_z );
308
309 return FLA_SUCCESS;
310}
FLA_Error FLA_Fused_Gerc2_ops_var1(int m_A, int n_A, float *buff_alpha, float *buff_u, int inc_u, float *buff_y, int inc_y, float *buff_z, int inc_z, float *buff_v, int inc_v, float *buff_A, int rs_A, int cs_A)
Definition FLA_Fused_Gerc2_opt_var1.c:130
FLA_Error FLA_Househ2_UT_l_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *tau)
Definition FLA_Househ2_UT.c:160
FLA_Error FLA_Househ2_UT_r_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *tau)
Definition FLA_Househ2_UT.c:651
void bl1_saxpyv(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy)
Definition bl1_axpyv.c:13
void bl1_scopyv(conj1_t conj, int m, float *x, int incx, float *y, int incy)
Definition bl1_copyv.c:35
void bl1_sdot(conj1_t conj, int n, float *x, int incx, float *y, int incy, float *rho)
Definition bl1_dot.c:13
void bl1_sgemv(trans1_t transa, conj1_t conjx, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
Definition bl1_gemv.c:13
void bl1_sinvscalv(conj1_t conj, int n, float *alpha, float *x, int incx)
Definition bl1_invscalv.c:13

References bl1_saxpyv(), bl1_scopyv(), bl1_sdot(), bl1_sgemv(), bl1_sinvscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_free(), FLA_Fused_Gerc2_ops_var1(), FLA_Househ2_UT_l_ops(), FLA_Househ2_UT_r_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_ofu_var2().

◆ FLA_Bidiag_UT_u_step_ofu_var2()

FLA_Error FLA_Bidiag_UT_u_step_ofu_var2 ( FLA_Obj  A,
FLA_Obj  T,
FLA_Obj  S 
)
19{
20 FLA_Datatype datatype;
21 int m_A, n_A, m_TS;
22 int rs_A, cs_A;
23 int rs_T, cs_T;
24 int rs_S, cs_S;
25
26 datatype = FLA_Obj_datatype( A );
27
28 m_A = FLA_Obj_length( A );
29 n_A = FLA_Obj_width( A );
31
34
37
40
41
42 switch ( datatype )
43 {
44 case FLA_FLOAT:
45 {
46 float* buff_A = FLA_FLOAT_PTR( A );
47 float* buff_T = FLA_FLOAT_PTR( T );
48 float* buff_S = FLA_FLOAT_PTR( S );
49
51 n_A,
52 m_TS,
55 buff_S, rs_S, cs_S );
56
57 break;
58 }
59
60 case FLA_DOUBLE:
61 {
62 double* buff_A = FLA_DOUBLE_PTR( A );
63 double* buff_T = FLA_DOUBLE_PTR( T );
64 double* buff_S = FLA_DOUBLE_PTR( S );
65
67 n_A,
68 m_TS,
71 buff_S, rs_S, cs_S );
72
73 break;
74 }
75
76 case FLA_COMPLEX:
77 {
81
83 n_A,
84 m_TS,
87 buff_S, rs_S, cs_S );
88
89 break;
90 }
91
93 {
97
99 n_A,
100 m_TS,
101 buff_A, rs_A, cs_A,
102 buff_T, rs_T, cs_T,
103 buff_S, rs_S, cs_S );
104
105 break;
106 }
107 }
108
109 return FLA_SUCCESS;
110}
FLA_Error FLA_Bidiag_UT_u_step_ofs_var2(int m_A, int n_A, int m_TS, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T, float *buff_S, int rs_S, int cs_S)
Definition FLA_Bidiag_UT_u_fus_var2.c:114
FLA_Error FLA_Bidiag_UT_u_step_ofc_var2(int m_A, int n_A, int m_TS, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T, scomplex *buff_S, int rs_S, int cs_S)
Definition FLA_Bidiag_UT_u_fus_var2.c:514
FLA_Error FLA_Bidiag_UT_u_step_ofz_var2(int m_A, int n_A, int m_TS, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T, dcomplex *buff_S, int rs_S, int cs_S)
Definition FLA_Bidiag_UT_u_fus_var2.c:714
FLA_Error FLA_Bidiag_UT_u_step_ofd_var2(int m_A, int n_A, int m_TS, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T, double *buff_S, int rs_S, int cs_S)
Definition FLA_Bidiag_UT_u_fus_var2.c:314
dim_t FLA_Obj_width(FLA_Obj obj)
Definition FLA_Query.c:123
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition FLA_Query.c:167
dim_t FLA_Obj_length(FLA_Obj obj)
Definition FLA_Query.c:116
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition FLA_Query.c:174
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition FLA_Query.c:13
int FLA_Datatype
Definition FLA_type_defs.h:49
Definition blis_type_defs.h:138

References FLA_Bidiag_UT_u_step_ofc_var2(), FLA_Bidiag_UT_u_step_ofd_var2(), FLA_Bidiag_UT_u_step_ofs_var2(), FLA_Bidiag_UT_u_step_ofz_var2(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), and i.

Referenced by FLA_Bidiag_UT_u_blf_var2(), and FLA_Bidiag_UT_u_ofu_var2().

◆ FLA_Bidiag_UT_u_step_ofz_var2()

FLA_Error FLA_Bidiag_UT_u_step_ofz_var2 ( int  m_A,
int  n_A,
int  m_TS,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_T,
int  rs_T,
int  cs_T,
dcomplex buff_S,
int  rs_S,
int  cs_S 
)
720{
724
726 int i;
727
728 // b_alg = FLA_Obj_length( T );
729 int b_alg = m_TS;
730
731 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
732 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
733 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
734 dcomplex* buff_v = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
735 dcomplex* buff_y = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
736 dcomplex* buff_z = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
737 int inc_v = 1;
738 int inc_y = 1;
739 int inc_z = 1;
740
741 for ( i = 0; i < b_alg; ++i )
742 {
743 dcomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
744 dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
745 dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
746 dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
747 dcomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
748 dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
749 dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
750
751 dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
752 dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
753
754 dcomplex* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
755 dcomplex* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
756
757 dcomplex* v21 = buff_v + (i+1)*inc_v;
758
759 dcomplex* y21 = buff_y + (i+1)*inc_y;
760
761 dcomplex* z21 = buff_z + (i+1)*inc_z;
762
763 dcomplex* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
764 dcomplex* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
765
766 dcomplex* v21_t = v21 + (0 )*inc_v;
767 dcomplex* v21_b = v21 + (1 )*inc_v;
768
769 int m_ahead = m_A - i - 1;
770 int n_ahead = n_A - i - 1;
771 int m_behind = i;
772 int n_behind = i;
773
774 /*------------------------------------------------------------*/
775
776 // FLA_Househ2_UT( FLA_LEFT,
777 // alpha11,
778 // a21, tau11 );
780 alpha11,
781 a21, rs_A,
782 tau11 );
783
784 if ( n_ahead > 0 )
785 {
786 // FLA_Copyt( FLA_CONJ_TRANSPOSE, a12t, y21 );
787 // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, a21, FLA_ONE, y21 );
789 n_ahead,
790 a12t, cs_A,
791 y21, inc_y );
794 m_ahead,
795 n_ahead,
796 buff_1,
797 A22, rs_A, cs_A,
798 a21, rs_A,
799 buff_1,
800 y21, inc_y );
801
802 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
804 n_ahead,
805 tau11,
806 y21, inc_y );
807
808 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, y21, a12t );
810 n_ahead,
811 buff_m1,
812 y21, inc_y,
813 a12t, cs_A );
814
815 // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
817 a12t_l,
818 a12t_r, cs_A,
819 sigma11 );
820
821 // FLA_Set( FLA_ONE, v21_t );
822 // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
823 *v21_t = *buff_1;
825 n_ahead - 1,
826 a12t_r, cs_A,
827 v21_b, inc_y );
828
829 // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
830 // FLA_Scal( FLA_MINUS_ONE, beta );
832 n_ahead,
833 y21, inc_y,
834 v21, inc_v,
835 &beta );
836 bl1_zneg1( &beta );
837
838 // FLA_Copy( a21, z21 );
839 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, v21, beta, z21 );
840 // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
842 m_ahead,
843 a21, rs_A,
844 z21, inc_z );
847 m_ahead,
848 n_ahead,
849 buff_1,
850 A22, rs_A, cs_A,
851 v21, inc_v,
852 &beta,
853 z21, inc_z );
855 m_ahead,
856 sigma11,
857 z21, inc_z );
858
859 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, a21, y21, A22 );
860 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
862 n_ahead,
863 buff_m1,
864 a21, rs_A,
865 y21, inc_y,
866 z21, inc_z,
867 v21, inc_v,
868 A22, rs_A, cs_A );
869
870 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
873 m_behind,
874 n_ahead,
875 buff_1,
876 A02, rs_A, cs_A,
877 v21, inc_v,
878 buff_0,
879 s01, rs_S );
880 }
881
882 // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
883 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
885 n_behind,
886 a10t, cs_A,
887 t01, rs_T );
890 m_ahead,
891 n_behind,
892 buff_1,
893 A20, rs_A, cs_A,
894 a21, rs_A,
895 buff_1,
896 t01, rs_T );
897
898 /*------------------------------------------------------------*/
899
900 }
901
902 // FLA_Obj_free( &v );
903 // FLA_Obj_free( &y );
904 // FLA_Obj_free( &z );
905 FLA_free( buff_v );
906 FLA_free( buff_y );
907 FLA_free( buff_z );
908
909 return FLA_SUCCESS;
910}
FLA_Error FLA_Fused_Gerc2_opz_var1(int m_A, int n_A, dcomplex *buff_alpha, dcomplex *buff_u, int inc_u, dcomplex *buff_y, int inc_y, dcomplex *buff_z, int inc_z, dcomplex *buff_v, int inc_v, dcomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Fused_Gerc2_opt_var1.c:306
FLA_Error FLA_Househ2_UT_l_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *tau)
Definition FLA_Househ2_UT.c:521
FLA_Error FLA_Househ2_UT_r_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *tau)
Definition FLA_Househ2_UT.c:693
void bl1_zaxpyv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy)
Definition bl1_axpyv.c:60
void bl1_zcopyv(conj1_t conj, int m, dcomplex *x, int incx, dcomplex *y, int incy)
Definition bl1_copyv.c:63
void bl1_zdot(conj1_t conj, int n, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *rho)
Definition bl1_dot.c:65
void bl1_zgemv(trans1_t transa, conj1_t conjx, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition bl1_gemv.c:255
void bl1_zinvscalv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx)
Definition bl1_invscalv.c:78

References bl1_zaxpyv(), bl1_zcopyv(), bl1_zdot(), bl1_zgemv(), bl1_zinvscalv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Fused_Gerc2_opz_var1(), FLA_Househ2_UT_l_opz(), FLA_Househ2_UT_r_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_ofu_var2().