libflame revision_anchor
Functions
FLA_Hess_UT_opt_var2.c File Reference

(r)

Functions

FLA_Error FLA_Hess_UT_opt_var2 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_opt_var2 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_ops_var2 (int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_opd_var2 (int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_opc_var2 (int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_opz_var2 (int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T)
 

Function Documentation

◆ FLA_Hess_UT_opt_var2()

FLA_Error FLA_Hess_UT_opt_var2 ( FLA_Obj  A,
FLA_Obj  T 
)
14{
15 return FLA_Hess_UT_step_opt_var2( A, T );
16}
FLA_Error FLA_Hess_UT_step_opt_var2(FLA_Obj A, FLA_Obj T)
Definition FLA_Hess_UT_opt_var2.c:18
int i
Definition bl1_axmyv2.c:145

References FLA_Hess_UT_step_opt_var2(), and i.

Referenced by FLA_Hess_UT_internal().

◆ FLA_Hess_UT_step_opc_var2()

FLA_Error FLA_Hess_UT_step_opc_var2 ( int  m_A,
int  m_T,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_T,
int  rs_T,
int  cs_T 
)
539{
544
550 int i;
551
552 // b_alg = FLA_Obj_length( T );
553 int b_alg = m_T;
554
555 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &y );
556 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
557 scomplex* buff_y = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
558 scomplex* buff_z = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
559 int inc_y = 1;
560 int inc_z = 1;
561
562 for ( i = 0; i < b_alg; ++i )
563 {
564 scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
565 scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
566 scomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
567 scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
568 scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
569
570 scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
571 scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
572
573 scomplex* y0 = buff_y + (0 )*inc_y;
574 scomplex* y2 = buff_y + (i+1)*inc_y;
575
576 scomplex* z2 = buff_z + (i+1)*inc_z;
577
578 scomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
579 scomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
580
581 int m_ahead = m_A - i - 1;
582 int n_ahead = m_A - i - 1;
583 int m_behind = i;
584 int n_behind = i;
585
586 /*------------------------------------------------------------*/
587
588 if ( m_ahead > 0 )
589 {
590 // FLA_Househ2_UT( FLA_LEFT,
591 // a21_t,
592 // a21_b, tau11 );
594 a21_t,
595 a21_b, rs_A,
596 tau11 );
597
598 // FLA_Set( FLA_ONE, inv_tau11 );
599 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
600 // FLA_Copy( inv_tau11, minus_inv_tau11 );
601 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
604
605 // FLA_Copy( a21_t, first_elem );
606 // FLA_Set( FLA_ONE, a21_t );
607 first_elem = *a21_t;
608 *a21_t = *buff_1;
609
610 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, y2 );
613 m_ahead,
614 n_ahead,
615 buff_1,
616 A22, rs_A, cs_A,
617 a21, rs_A,
618 buff_0,
619 y2, inc_y );
620
621 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, z2 );
624 m_ahead,
625 n_ahead,
626 buff_1,
627 A22, rs_A, cs_A,
628 a21, rs_A,
629 buff_0,
630 z2, inc_z );
631
632 // FLA_Dotc( FLA_CONJUGATE, a21, z2, beta );
633 // FLA_Inv_scal( FLA_TWO, beta );
634 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
636 m_ahead,
637 a21, rs_A,
638 z2, inc_z,
639 &beta );
642
643 // FLA_Scal( minus_inv_tau11, conj_beta );
644 // FLA_Axpy( conj_beta, a21, y2 );
645 // FLA_Scal( inv_tau11, y2 );
648 m_ahead,
649 &conj_beta,
650 a21, rs_A,
651 y2, inc_y );
653 m_ahead,
654 &inv_tau11,
655 y2, inc_y );
656
657 // FLA_Scal( minus_inv_tau11, beta );
658 // FLA_Axpy( beta, a21, z2 );
659 // FLA_Scal( inv_tau11, z2 );
662 m_ahead,
663 &beta,
664 a21, rs_A,
665 z2, inc_z );
667 m_ahead,
668 &inv_tau11,
669 z2, inc_z );
670
671 // FLA_Dot( a12t, a21, dot_product );
672 // FLA_Scal( minus_inv_tau11, dot_product );
673 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
675 m_ahead,
676 a12t, cs_A,
677 a21, rs_A,
678 &dot_product );
681 m_ahead,
683 a21, rs_A,
684 a12t, cs_A );
685
686 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, y0 );
687 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, y0, a21, A02 );
690 m_behind,
691 n_ahead,
692 buff_1,
693 A02, rs_A, cs_A,
694 a21, rs_A,
695 buff_0,
696 y0, inc_y );
699 m_behind,
700 n_ahead,
702 y0, inc_y,
703 a21, rs_A,
704 A02, rs_A, cs_A );
705
706 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, a21, y2, A22 );
707 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, a21, A22 );
710 m_ahead,
711 n_ahead,
712 buff_m1,
713 a21, rs_A,
714 y2, inc_y,
715 A22, rs_A, cs_A );
718 m_ahead,
719 n_ahead,
720 buff_m1,
721 z2, inc_z,
722 a21, rs_A,
723 A22, rs_A, cs_A );
724
725 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
728 m_ahead,
729 n_behind,
730 buff_1,
731 A20, rs_A, cs_A,
732 a21, rs_A,
733 buff_0,
734 t01, rs_T );
735
736 // FLA_Copy( first_elem, a21_t );
737 *a21_t = first_elem;
738 }
739
740 /*------------------------------------------------------------*/
741
742 }
743
744 // FLA_Obj_free( &y );
745 // FLA_Obj_free( &z );
746 FLA_free( buff_y );
747 FLA_free( buff_z );
748
749 return FLA_SUCCESS;
750}
FLA_Obj FLA_TWO
Definition FLA_Init.c:17
FLA_Obj FLA_MINUS_ONE
Definition FLA_Init.c:22
FLA_Obj FLA_ZERO
Definition FLA_Init.c:20
FLA_Obj FLA_ONE
Definition FLA_Init.c:18
void FLA_free(void *ptr)
Definition FLA_Memory.c:247
void * FLA_malloc(size_t size)
Definition FLA_Memory.c:111
FLA_Error FLA_Househ2_UT_l_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *tau)
Definition FLA_Househ2_UT.c:390
void bl1_caxpyv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
Definition bl1_axpyv.c:29
void bl1_cdot(conj1_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho)
Definition bl1_dot.c:39
void bl1_cgemv(trans1_t transa, conj1_t conjx, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition bl1_gemv.c:125
void bl1_cger(conj1_t conjx, conj1_t conjy, int m, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy, scomplex *a, int a_rs, int a_cs)
Definition bl1_ger.c:111
void bl1_cscalv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx)
Definition bl1_scalv.c:46
@ BLIS1_NO_TRANSPOSE
Definition blis_type_defs.h:54
@ BLIS1_CONJ_TRANSPOSE
Definition blis_type_defs.h:57
@ BLIS1_CONJUGATE
Definition blis_type_defs.h:82
@ BLIS1_NO_CONJUGATE
Definition blis_type_defs.h:81
Definition blis_type_defs.h:133

References bl1_caxpyv(), bl1_cdot(), bl1_cgemv(), bl1_cger(), bl1_cscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_opt_var2().

◆ FLA_Hess_UT_step_opd_var2()

FLA_Error FLA_Hess_UT_step_opd_var2 ( int  m_A,
int  m_T,
double buff_A,
int  rs_A,
int  cs_A,
double buff_T,
int  rs_T,
int  cs_T 
)
320{
321 double* buff_2 = FLA_DOUBLE_PTR( FLA_TWO );
322 double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
323 double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
325
326 double first_elem;
327 double dot_product;
328 double beta, conj_beta;
329 double inv_tau11;
330 double minus_inv_tau11;
331 int i;
332
333 // b_alg = FLA_Obj_length( T );
334 int b_alg = m_T;
335
336 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &y );
337 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
338 double* buff_y = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
339 double* buff_z = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
340 int inc_y = 1;
341 int inc_z = 1;
342
343 for ( i = 0; i < b_alg; ++i )
344 {
345 double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
346 double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
347 double* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
348 double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
349 double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
350
351 double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
352 double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
353
354 double* y0 = buff_y + (0 )*inc_y;
355 double* y2 = buff_y + (i+1)*inc_y;
356
357 double* z2 = buff_z + (i+1)*inc_z;
358
359 double* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
360 double* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
361
362 int m_ahead = m_A - i - 1;
363 int n_ahead = m_A - i - 1;
364 int m_behind = i;
365 int n_behind = i;
366
367 /*------------------------------------------------------------*/
368
369 if ( m_ahead > 0 )
370 {
371 // FLA_Househ2_UT( FLA_LEFT,
372 // a21_t,
373 // a21_b, tau11 );
375 a21_t,
376 a21_b, rs_A,
377 tau11 );
378
379 // FLA_Set( FLA_ONE, inv_tau11 );
380 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
381 // FLA_Copy( inv_tau11, minus_inv_tau11 );
382 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
385
386 // FLA_Copy( a21_t, first_elem );
387 // FLA_Set( FLA_ONE, a21_t );
388 first_elem = *a21_t;
389 *a21_t = *buff_1;
390
391 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, y2 );
394 m_ahead,
395 n_ahead,
396 buff_1,
397 A22, rs_A, cs_A,
398 a21, rs_A,
399 buff_0,
400 y2, inc_y );
401
402 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, z2 );
405 m_ahead,
406 n_ahead,
407 buff_1,
408 A22, rs_A, cs_A,
409 a21, rs_A,
410 buff_0,
411 z2, inc_z );
412
413 // FLA_Dotc( FLA_CONJUGATE, a21, z2, beta );
414 // FLA_Inv_scal( FLA_TWO, beta );
415 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
417 m_ahead,
418 a21, rs_A,
419 z2, inc_z,
420 &beta );
423
424 // FLA_Scal( minus_inv_tau11, conj_beta );
425 // FLA_Axpy( conj_beta, a21, y2 );
426 // FLA_Scal( inv_tau11, y2 );
429 m_ahead,
430 &conj_beta,
431 a21, rs_A,
432 y2, inc_y );
434 m_ahead,
435 &inv_tau11,
436 y2, inc_y );
437
438 // FLA_Scal( minus_inv_tau11, beta );
439 // FLA_Axpy( beta, a21, z2 );
440 // FLA_Scal( inv_tau11, z2 );
443 m_ahead,
444 &beta,
445 a21, rs_A,
446 z2, inc_z );
448 m_ahead,
449 &inv_tau11,
450 z2, inc_z );
451
452 // FLA_Dot( a12t, a21, dot_product );
453 // FLA_Scal( minus_inv_tau11, dot_product );
454 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
456 m_ahead,
457 a12t, cs_A,
458 a21, rs_A,
459 &dot_product );
462 m_ahead,
464 a21, rs_A,
465 a12t, cs_A );
466
467 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, y0 );
468 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, y0, a21, A02 );
471 m_behind,
472 n_ahead,
473 buff_1,
474 A02, rs_A, cs_A,
475 a21, rs_A,
476 buff_0,
477 y0, inc_y );
480 m_behind,
481 n_ahead,
483 y0, inc_y,
484 a21, rs_A,
485 A02, rs_A, cs_A );
486
487 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, a21, y2, A22 );
488 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, a21, A22 );
491 m_ahead,
492 n_ahead,
493 buff_m1,
494 a21, rs_A,
495 y2, inc_y,
496 A22, rs_A, cs_A );
499 m_ahead,
500 n_ahead,
501 buff_m1,
502 z2, inc_z,
503 a21, rs_A,
504 A22, rs_A, cs_A );
505
506 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
509 m_ahead,
510 n_behind,
511 buff_1,
512 A20, rs_A, cs_A,
513 a21, rs_A,
514 buff_0,
515 t01, rs_T );
516
517 // FLA_Copy( first_elem, a21_t );
518 *a21_t = first_elem;
519 }
520
521 /*------------------------------------------------------------*/
522
523 }
524
525 // FLA_Obj_free( &y );
526 // FLA_Obj_free( &z );
527 FLA_free( buff_y );
528 FLA_free( buff_z );
529
530 return FLA_SUCCESS;
531}
FLA_Error FLA_Househ2_UT_l_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *tau)
Definition FLA_Househ2_UT.c:274
void bl1_daxpyv(conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy)
Definition bl1_axpyv.c:21
void bl1_ddot(conj1_t conj, int n, double *x, int incx, double *y, int incy, double *rho)
Definition bl1_dot.c:26
void bl1_dgemv(trans1_t transa, conj1_t conjx, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
Definition bl1_gemv.c:69
void bl1_dger(conj1_t conjx, conj1_t conjy, int m, int n, double *alpha, double *x, int incx, double *y, int incy, double *a, int a_rs, int a_cs)
Definition bl1_ger.c:62
void bl1_dscalv(conj1_t conj, int n, double *alpha, double *x, int incx)
Definition bl1_scalv.c:24

References bl1_daxpyv(), bl1_ddot(), bl1_dgemv(), bl1_dger(), bl1_dscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_opt_var2().

◆ FLA_Hess_UT_step_ops_var2()

FLA_Error FLA_Hess_UT_step_ops_var2 ( int  m_A,
int  m_T,
float buff_A,
int  rs_A,
int  cs_A,
float buff_T,
int  rs_T,
int  cs_T 
)
101{
102 float* buff_2 = FLA_FLOAT_PTR( FLA_TWO );
103 float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
104 float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
106
107 float first_elem;
108 float dot_product;
109 float beta, conj_beta;
110 float inv_tau11;
111 float minus_inv_tau11;
112 int i;
113
114 // b_alg = FLA_Obj_length( T );
115 int b_alg = m_T;
116
117 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &y );
118 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
119 float* buff_y = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
120 float* buff_z = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
121 int inc_y = 1;
122 int inc_z = 1;
123
124 for ( i = 0; i < b_alg; ++i )
125 {
126 float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
127 float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
128 float* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
129 float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
130 float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
131
132 float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
133 float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
134
135 float* y0 = buff_y + (0 )*inc_y;
136 float* y2 = buff_y + (i+1)*inc_y;
137
138 float* z2 = buff_z + (i+1)*inc_z;
139
140 float* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
141 float* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
142
143 int m_ahead = m_A - i - 1;
144 int n_ahead = m_A - i - 1;
145 int m_behind = i;
146 int n_behind = i;
147
148 /*------------------------------------------------------------*/
149
150 if ( m_ahead > 0 )
151 {
152 // FLA_Househ2_UT( FLA_LEFT,
153 // a21_t,
154 // a21_b, tau11 );
156 a21_t,
157 a21_b, rs_A,
158 tau11 );
159
160 // FLA_Set( FLA_ONE, inv_tau11 );
161 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
162 // FLA_Copy( inv_tau11, minus_inv_tau11 );
163 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
166
167 // FLA_Copy( a21_t, first_elem );
168 // FLA_Set( FLA_ONE, a21_t );
169 first_elem = *a21_t;
170 *a21_t = *buff_1;
171
172 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, y2 );
175 m_ahead,
176 n_ahead,
177 buff_1,
178 A22, rs_A, cs_A,
179 a21, rs_A,
180 buff_0,
181 y2, inc_y );
182
183 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, z2 );
186 m_ahead,
187 n_ahead,
188 buff_1,
189 A22, rs_A, cs_A,
190 a21, rs_A,
191 buff_0,
192 z2, inc_z );
193
194 // FLA_Dotc( FLA_CONJUGATE, a21, z2, beta );
195 // FLA_Inv_scal( FLA_TWO, beta );
196 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
198 m_ahead,
199 a21, rs_A,
200 z2, inc_z,
201 &beta );
204
205 // FLA_Scal( minus_inv_tau11, conj_beta );
206 // FLA_Axpy( conj_beta, a21, y2 );
207 // FLA_Scal( inv_tau11, y2 );
210 m_ahead,
211 &conj_beta,
212 a21, rs_A,
213 y2, inc_y );
215 m_ahead,
216 &inv_tau11,
217 y2, inc_y );
218
219 // FLA_Scal( minus_inv_tau11, beta );
220 // FLA_Axpy( beta, a21, z2 );
221 // FLA_Scal( inv_tau11, z2 );
224 m_ahead,
225 &beta,
226 a21, rs_A,
227 z2, inc_z );
229 m_ahead,
230 &inv_tau11,
231 z2, inc_z );
232
233 // FLA_Dot( a12t, a21, dot_product );
234 // FLA_Scal( minus_inv_tau11, dot_product );
235 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
237 m_ahead,
238 a12t, cs_A,
239 a21, rs_A,
240 &dot_product );
243 m_ahead,
245 a21, rs_A,
246 a12t, cs_A );
247
248 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, y0 );
249 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, y0, a21, A02 );
252 m_behind,
253 n_ahead,
254 buff_1,
255 A02, rs_A, cs_A,
256 a21, rs_A,
257 buff_0,
258 y0, inc_y );
261 m_behind,
262 n_ahead,
264 y0, inc_y,
265 a21, rs_A,
266 A02, rs_A, cs_A );
267
268 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, a21, y2, A22 );
269 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, a21, A22 );
272 m_ahead,
273 n_ahead,
274 buff_m1,
275 a21, rs_A,
276 y2, inc_y,
277 A22, rs_A, cs_A );
280 m_ahead,
281 n_ahead,
282 buff_m1,
283 z2, inc_z,
284 a21, rs_A,
285 A22, rs_A, cs_A );
286
287 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
290 m_ahead,
291 n_behind,
292 buff_1,
293 A20, rs_A, cs_A,
294 a21, rs_A,
295 buff_0,
296 t01, rs_T );
297
298 // FLA_Copy( first_elem, a21_t );
299 *a21_t = first_elem;
300 }
301
302 /*------------------------------------------------------------*/
303
304 }
305
306 // FLA_Obj_free( &y );
307 // FLA_Obj_free( &z );
308 FLA_free( buff_y );
309 FLA_free( buff_z );
310
311 return FLA_SUCCESS;
312}
FLA_Error FLA_Househ2_UT_l_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *tau)
Definition FLA_Househ2_UT.c:160
void bl1_saxpyv(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy)
Definition bl1_axpyv.c:13
void bl1_sdot(conj1_t conj, int n, float *x, int incx, float *y, int incy, float *rho)
Definition bl1_dot.c:13
void bl1_sgemv(trans1_t transa, conj1_t conjx, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
Definition bl1_gemv.c:13
void bl1_sger(conj1_t conjx, conj1_t conjy, int m, int n, float *alpha, float *x, int incx, float *y, int incy, float *a, int a_rs, int a_cs)
Definition bl1_ger.c:13
void bl1_sscalv(conj1_t conj, int n, float *alpha, float *x, int incx)
Definition bl1_scalv.c:13

References bl1_saxpyv(), bl1_sdot(), bl1_sgemv(), bl1_sger(), bl1_sscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_opt_var2().

◆ FLA_Hess_UT_step_opt_var2()

FLA_Error FLA_Hess_UT_step_opt_var2 ( FLA_Obj  A,
FLA_Obj  T 
)
19{
20 FLA_Datatype datatype;
21 int m_A, m_T;
22 int rs_A, cs_A;
23 int rs_T, cs_T;
24
25 datatype = FLA_Obj_datatype( A );
26
27 m_A = FLA_Obj_length( A );
28 m_T = FLA_Obj_length( T );
29
32
35
36
37 switch ( datatype )
38 {
39 case FLA_FLOAT:
40 {
41 float* buff_A = FLA_FLOAT_PTR( A );
42 float* buff_T = FLA_FLOAT_PTR( T );
43
45 m_T,
47 buff_T, rs_T, cs_T );
48
49 break;
50 }
51
52 case FLA_DOUBLE:
53 {
54 double* buff_A = FLA_DOUBLE_PTR( A );
55 double* buff_T = FLA_DOUBLE_PTR( T );
56
58 m_T,
60 buff_T, rs_T, cs_T );
61
62 break;
63 }
64
65 case FLA_COMPLEX:
66 {
69
71 m_T,
73 buff_T, rs_T, cs_T );
74
75 break;
76 }
77
79 {
82
84 m_T,
86 buff_T, rs_T, cs_T );
87
88 break;
89 }
90 }
91
92 return FLA_SUCCESS;
93}
FLA_Error FLA_Hess_UT_step_opc_var2(int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T)
Definition FLA_Hess_UT_opt_var2.c:535
FLA_Error FLA_Hess_UT_step_opd_var2(int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T)
Definition FLA_Hess_UT_opt_var2.c:316
FLA_Error FLA_Hess_UT_step_opz_var2(int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T)
Definition FLA_Hess_UT_opt_var2.c:754
FLA_Error FLA_Hess_UT_step_ops_var2(int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T)
Definition FLA_Hess_UT_opt_var2.c:97
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition FLA_Query.c:167
dim_t FLA_Obj_length(FLA_Obj obj)
Definition FLA_Query.c:116
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition FLA_Query.c:174
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition FLA_Query.c:13
int FLA_Datatype
Definition FLA_type_defs.h:49
Definition blis_type_defs.h:138

References FLA_Hess_UT_step_opc_var2(), FLA_Hess_UT_step_opd_var2(), FLA_Hess_UT_step_ops_var2(), FLA_Hess_UT_step_opz_var2(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and i.

Referenced by FLA_Hess_UT_blk_var2(), and FLA_Hess_UT_opt_var2().

◆ FLA_Hess_UT_step_opz_var2()

FLA_Error FLA_Hess_UT_step_opz_var2 ( int  m_A,
int  m_T,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_T,
int  rs_T,
int  cs_T 
)
758{
763
769 int i;
770
771 // b_alg = FLA_Obj_length( T );
772 int b_alg = m_T;
773
774 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &y );
775 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
776 dcomplex* buff_y = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
777 dcomplex* buff_z = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
778 int inc_y = 1;
779 int inc_z = 1;
780
781 for ( i = 0; i < b_alg; ++i )
782 {
783 dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
784 dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
785 dcomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
786 dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
787 dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
788
789 dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
790 dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
791
792 dcomplex* y0 = buff_y + (0 )*inc_y;
793 dcomplex* y2 = buff_y + (i+1)*inc_y;
794
795 dcomplex* z2 = buff_z + (i+1)*inc_z;
796
797 dcomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
798 dcomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
799
800 int m_ahead = m_A - i - 1;
801 int n_ahead = m_A - i - 1;
802 int m_behind = i;
803 int n_behind = i;
804
805 /*------------------------------------------------------------*/
806
807 if ( m_ahead > 0 )
808 {
809 // FLA_Househ2_UT( FLA_LEFT,
810 // a21_t,
811 // a21_b, tau11 );
813 a21_t,
814 a21_b, rs_A,
815 tau11 );
816
817 // FLA_Set( FLA_ONE, inv_tau11 );
818 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
819 // FLA_Copy( inv_tau11, minus_inv_tau11 );
820 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
823
824 // FLA_Copy( a21_t, first_elem );
825 // FLA_Set( FLA_ONE, a21_t );
826 first_elem = *a21_t;
827 *a21_t = *buff_1;
828
829 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, y2 );
832 m_ahead,
833 n_ahead,
834 buff_1,
835 A22, rs_A, cs_A,
836 a21, rs_A,
837 buff_0,
838 y2, inc_y );
839
840 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, z2 );
843 m_ahead,
844 n_ahead,
845 buff_1,
846 A22, rs_A, cs_A,
847 a21, rs_A,
848 buff_0,
849 z2, inc_z );
850
851 // FLA_Dotc( FLA_CONJUGATE, a21, z2, beta );
852 // FLA_Inv_scal( FLA_TWO, beta );
853 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
855 m_ahead,
856 a21, rs_A,
857 z2, inc_z,
858 &beta );
861
862 // FLA_Scal( minus_inv_tau11, conj_beta );
863 // FLA_Axpy( conj_beta, a21, y2 );
864 // FLA_Scal( inv_tau11, y2 );
867 m_ahead,
868 &conj_beta,
869 a21, rs_A,
870 y2, inc_y );
872 m_ahead,
873 &inv_tau11,
874 y2, inc_y );
875
876 // FLA_Scal( minus_inv_tau11, beta );
877 // FLA_Axpy( beta, a21, z2 );
878 // FLA_Scal( inv_tau11, z2 );
881 m_ahead,
882 &beta,
883 a21, rs_A,
884 z2, inc_z );
886 m_ahead,
887 &inv_tau11,
888 z2, inc_z );
889
890 // FLA_Dot( a12t, a21, dot_product );
891 // FLA_Scal( minus_inv_tau11, dot_product );
892 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
894 m_ahead,
895 a12t, cs_A,
896 a21, rs_A,
897 &dot_product );
900 m_ahead,
902 a21, rs_A,
903 a12t, cs_A );
904
905 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, y0 );
906 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, y0, a21, A02 );
909 m_behind,
910 n_ahead,
911 buff_1,
912 A02, rs_A, cs_A,
913 a21, rs_A,
914 buff_0,
915 y0, inc_y );
918 m_behind,
919 n_ahead,
921 y0, inc_y,
922 a21, rs_A,
923 A02, rs_A, cs_A );
924
925 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, a21, y2, A22 );
926 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, a21, A22 );
929 m_ahead,
930 n_ahead,
931 buff_m1,
932 a21, rs_A,
933 y2, inc_y,
934 A22, rs_A, cs_A );
937 m_ahead,
938 n_ahead,
939 buff_m1,
940 z2, inc_z,
941 a21, rs_A,
942 A22, rs_A, cs_A );
943
944 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
947 m_ahead,
948 n_behind,
949 buff_1,
950 A20, rs_A, cs_A,
951 a21, rs_A,
952 buff_0,
953 t01, rs_T );
954
955 // FLA_Copy( first_elem, a21_t );
956 *a21_t = first_elem;
957 }
958
959 /*------------------------------------------------------------*/
960
961 }
962
963 // FLA_Obj_free( &y );
964 // FLA_Obj_free( &z );
965 FLA_free( buff_y );
966 FLA_free( buff_z );
967
968 return FLA_SUCCESS;
969}
FLA_Error FLA_Househ2_UT_l_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *tau)
Definition FLA_Househ2_UT.c:521
void bl1_zaxpyv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy)
Definition bl1_axpyv.c:60
void bl1_zdot(conj1_t conj, int n, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *rho)
Definition bl1_dot.c:65
bl1_zscals(beta, rho_yz)
void bl1_zgemv(trans1_t transa, conj1_t conjx, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition bl1_gemv.c:255
void bl1_zger(conj1_t conjx, conj1_t conjy, int m, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *a, int a_rs, int a_cs)
Definition bl1_ger.c:194
void bl1_zscalv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx)
Definition bl1_scalv.c:72

References bl1_zaxpyv(), bl1_zdot(), bl1_zgemv(), bl1_zger(), bl1_zscals(), bl1_zscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_opt_var2().