libflame revision_anchor
Functions
FLA_Hess_UT_fus_var2.c File Reference

(r)

Functions

FLA_Error FLA_Hess_UT_ofu_var2 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_ofu_var2 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_ofs_var2 (int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_ofd_var2 (int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_ofc_var2 (int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_ofz_var2 (int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T)
 

Function Documentation

◆ FLA_Hess_UT_ofu_var2()

FLA_Error FLA_Hess_UT_ofu_var2 ( FLA_Obj  A,
FLA_Obj  T 
)
14{
15 return FLA_Hess_UT_step_ofu_var2( A, T );
16}
FLA_Error FLA_Hess_UT_step_ofu_var2(FLA_Obj A, FLA_Obj T)
Definition FLA_Hess_UT_fus_var2.c:18
int i
Definition bl1_axmyv2.c:145

References FLA_Hess_UT_step_ofu_var2(), and i.

◆ FLA_Hess_UT_step_ofc_var2()

FLA_Error FLA_Hess_UT_step_ofc_var2 ( int  m_A,
int  m_T,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_T,
int  rs_T,
int  cs_T 
)
497{
502
508 int i;
509
510 // b_alg = FLA_Obj_length( T );
511 int b_alg = m_T;
512
513 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &y );
514 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
515 scomplex* buff_y = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
516 scomplex* buff_z = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
517 int inc_y = 1;
518 int inc_z = 1;
519
520 for ( i = 0; i < b_alg; ++i )
521 {
522 scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
523 scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
524 scomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
525 scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
526 scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
527
528 scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
529 scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
530
531 scomplex* y0 = buff_y + (0 )*inc_y;
532 scomplex* y2 = buff_y + (i+1)*inc_y;
533
534 scomplex* z2 = buff_z + (i+1)*inc_z;
535
536 scomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
537 scomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
538
539 int m_ahead = m_A - i - 1;
540 int n_ahead = m_A - i - 1;
541 int m_behind = i;
542 int n_behind = i;
543
544 /*------------------------------------------------------------*/
545
546 if ( m_ahead > 0 )
547 {
548 // FLA_Househ2_UT( FLA_LEFT,
549 // a21_t,
550 // a21_b, tau11 );
552 a21_t,
553 a21_b, rs_A,
554 tau11 );
555
556 // FLA_Set( FLA_ONE, inv_tau11 );
557 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
558 // FLA_Copy( inv_tau11, minus_inv_tau11 );
559 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
562
563 // FLA_Copy( a21_t, first_elem );
564 // FLA_Set( FLA_ONE, a21_t );
565 first_elem = *a21_t;
566 *a21_t = *buff_1;
567
568 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, y2 );
569 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, z2 );
571 n_ahead,
572 A22, rs_A, cs_A,
573 a21, rs_A,
574 y2, inc_y,
575 z2, inc_z );
576
577 // FLA_Dotc( FLA_CONJUGATE, a21, z2, beta );
578 // FLA_Inv_scal( FLA_TWO, beta );
579 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
581 m_ahead,
582 a21, rs_A,
583 z2, inc_z,
584 &beta );
587
588 // FLA_Scal( minus_inv_tau11, conj_beta );
589 // FLA_Axpy( conj_beta, a21, y2 );
590 // FLA_Scal( inv_tau11, y2 );
593 m_ahead,
594 &conj_beta,
595 a21, rs_A,
596 y2, inc_y );
598 m_ahead,
599 &inv_tau11,
600 y2, inc_y );
601
602 // FLA_Scal( minus_inv_tau11, beta );
603 // FLA_Axpy( beta, a21, z2 );
604 // FLA_Scal( inv_tau11, z2 );
607 m_ahead,
608 &beta,
609 a21, rs_A,
610 z2, inc_z );
612 m_ahead,
613 &inv_tau11,
614 z2, inc_z );
615
616 // FLA_Dot( a12t, a21, dot_product );
617 // FLA_Scal( minus_inv_tau11, dot_product );
618 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
620 m_ahead,
621 a12t, cs_A,
622 a21, rs_A,
623 &dot_product );
626 m_ahead,
628 a21, rs_A,
629 a12t, cs_A );
630
631 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, y0 );
632 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, y0, a21, A02 );
635 m_behind,
636 n_ahead,
637 buff_1,
638 A02, rs_A, cs_A,
639 a21, rs_A,
640 buff_0,
641 y0, inc_y );
644 m_behind,
645 n_ahead,
647 y0, inc_y,
648 a21, rs_A,
649 A02, rs_A, cs_A );
650
651 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, a21, y2, A22 );
652 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, a21, A22 );
654 n_ahead,
655 buff_m1,
656 a21, rs_A,
657 y2, inc_y,
658 z2, inc_z,
659 a21, rs_A,
660 A22, rs_A, cs_A );
661
662 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
665 m_ahead,
666 n_behind,
667 buff_1,
668 A20, rs_A, cs_A,
669 a21, rs_A,
670 buff_0,
671 t01, rs_T );
672
673 // FLA_Copy( first_elem, a21_t );
674 *a21_t = first_elem;
675 }
676
677 /*------------------------------------------------------------*/
678
679 }
680
681 // FLA_Obj_free( &y );
682 // FLA_Obj_free( &z );
683 FLA_free( buff_y );
684 FLA_free( buff_z );
685
686 return FLA_SUCCESS;
687}
FLA_Error FLA_Fused_Gerc2_opc_var1(int m_A, int n_A, scomplex *buff_alpha, scomplex *buff_u, int inc_u, scomplex *buff_y, int inc_y, scomplex *buff_z, int inc_z, scomplex *buff_v, int inc_v, scomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Fused_Gerc2_opt_var1.c:241
FLA_Error FLA_Fused_Ahx_Ax_opc_var1(int m_A, int n_A, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_x, int inc_x, scomplex *buff_v, int inc_v, scomplex *buff_w, int inc_w)
Definition FLA_Fused_Ahx_Ax_opt_var1.c:256
FLA_Obj FLA_TWO
Definition FLA_Init.c:17
FLA_Obj FLA_MINUS_ONE
Definition FLA_Init.c:22
FLA_Obj FLA_ZERO
Definition FLA_Init.c:20
FLA_Obj FLA_ONE
Definition FLA_Init.c:18
void FLA_free(void *ptr)
Definition FLA_Memory.c:247
void * FLA_malloc(size_t size)
Definition FLA_Memory.c:111
FLA_Error FLA_Househ2_UT_l_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *tau)
Definition FLA_Househ2_UT.c:390
void bl1_caxpyv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
Definition bl1_axpyv.c:29
void bl1_cdot(conj1_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho)
Definition bl1_dot.c:39
void bl1_cgemv(trans1_t transa, conj1_t conjx, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition bl1_gemv.c:125
void bl1_cger(conj1_t conjx, conj1_t conjy, int m, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy, scomplex *a, int a_rs, int a_cs)
Definition bl1_ger.c:111
void bl1_cscalv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx)
Definition bl1_scalv.c:46
@ BLIS1_NO_TRANSPOSE
Definition blis_type_defs.h:54
@ BLIS1_CONJ_TRANSPOSE
Definition blis_type_defs.h:57
@ BLIS1_CONJUGATE
Definition blis_type_defs.h:82
@ BLIS1_NO_CONJUGATE
Definition blis_type_defs.h:81
Definition blis_type_defs.h:133

References bl1_caxpyv(), bl1_cdot(), bl1_cgemv(), bl1_cger(), bl1_cscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Fused_Ahx_Ax_opc_var1(), FLA_Fused_Gerc2_opc_var1(), FLA_Househ2_UT_l_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_ofu_var2().

◆ FLA_Hess_UT_step_ofd_var2()

FLA_Error FLA_Hess_UT_step_ofd_var2 ( int  m_A,
int  m_T,
double buff_A,
int  rs_A,
int  cs_A,
double buff_T,
int  rs_T,
int  cs_T 
)
299{
300 double* buff_2 = FLA_DOUBLE_PTR( FLA_TWO );
301 double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
302 double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
304
305 double first_elem;
306 double dot_product;
307 double beta, conj_beta;
308 double inv_tau11;
309 double minus_inv_tau11;
310 int i;
311
312 // b_alg = FLA_Obj_length( T );
313 int b_alg = m_T;
314
315 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &y );
316 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
317 double* buff_y = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
318 double* buff_z = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
319 int inc_y = 1;
320 int inc_z = 1;
321
322 for ( i = 0; i < b_alg; ++i )
323 {
324 double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
325 double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
326 double* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
327 double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
328 double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
329
330 double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
331 double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
332
333 double* y0 = buff_y + (0 )*inc_y;
334 double* y2 = buff_y + (i+1)*inc_y;
335
336 double* z2 = buff_z + (i+1)*inc_z;
337
338 double* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
339 double* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
340
341 int m_ahead = m_A - i - 1;
342 int n_ahead = m_A - i - 1;
343 int m_behind = i;
344 int n_behind = i;
345
346 /*------------------------------------------------------------*/
347
348 if ( m_ahead > 0 )
349 {
350 // FLA_Househ2_UT( FLA_LEFT,
351 // a21_t,
352 // a21_b, tau11 );
354 a21_t,
355 a21_b, rs_A,
356 tau11 );
357
358 // FLA_Set( FLA_ONE, inv_tau11 );
359 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
360 // FLA_Copy( inv_tau11, minus_inv_tau11 );
361 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
364
365 // FLA_Copy( a21_t, first_elem );
366 // FLA_Set( FLA_ONE, a21_t );
367 first_elem = *a21_t;
368 *a21_t = *buff_1;
369
370 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, y2 );
371 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, z2 );
373 n_ahead,
374 A22, rs_A, cs_A,
375 a21, rs_A,
376 y2, inc_y,
377 z2, inc_z );
378
379 // FLA_Dotc( FLA_CONJUGATE, a21, z2, beta );
380 // FLA_Inv_scal( FLA_TWO, beta );
381 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
383 m_ahead,
384 a21, rs_A,
385 z2, inc_z,
386 &beta );
389
390 // FLA_Scal( minus_inv_tau11, conj_beta );
391 // FLA_Axpy( conj_beta, a21, y2 );
392 // FLA_Scal( inv_tau11, y2 );
395 m_ahead,
396 &conj_beta,
397 a21, rs_A,
398 y2, inc_y );
400 m_ahead,
401 &inv_tau11,
402 y2, inc_y );
403
404 // FLA_Scal( minus_inv_tau11, beta );
405 // FLA_Axpy( beta, a21, z2 );
406 // FLA_Scal( inv_tau11, z2 );
409 m_ahead,
410 &beta,
411 a21, rs_A,
412 z2, inc_z );
414 m_ahead,
415 &inv_tau11,
416 z2, inc_z );
417
418 // FLA_Dot( a12t, a21, dot_product );
419 // FLA_Scal( minus_inv_tau11, dot_product );
420 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
422 m_ahead,
423 a12t, cs_A,
424 a21, rs_A,
425 &dot_product );
428 m_ahead,
430 a21, rs_A,
431 a12t, cs_A );
432
433 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, y0 );
434 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, y0, a21, A02 );
437 m_behind,
438 n_ahead,
439 buff_1,
440 A02, rs_A, cs_A,
441 a21, rs_A,
442 buff_0,
443 y0, inc_y );
446 m_behind,
447 n_ahead,
449 y0, inc_y,
450 a21, rs_A,
451 A02, rs_A, cs_A );
452
453 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, a21, y2, A22 );
454 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, a21, A22 );
456 n_ahead,
457 buff_m1,
458 a21, rs_A,
459 y2, inc_y,
460 z2, inc_z,
461 a21, rs_A,
462 A22, rs_A, cs_A );
463
464 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
467 m_ahead,
468 n_behind,
469 buff_1,
470 A20, rs_A, cs_A,
471 a21, rs_A,
472 buff_0,
473 t01, rs_T );
474
475 // FLA_Copy( first_elem, a21_t );
476 *a21_t = first_elem;
477 }
478
479 /*------------------------------------------------------------*/
480
481 }
482
483 // FLA_Obj_free( &y );
484 // FLA_Obj_free( &z );
485 FLA_free( buff_y );
486 FLA_free( buff_z );
487
488 return FLA_SUCCESS;
489}
FLA_Error FLA_Fused_Gerc2_opd_var1(int m_A, int n_A, double *buff_alpha, double *buff_u, int inc_u, double *buff_y, int inc_y, double *buff_z, int inc_z, double *buff_v, int inc_v, double *buff_A, int rs_A, int cs_A)
Definition FLA_Fused_Gerc2_opt_var1.c:193
FLA_Error FLA_Fused_Ahx_Ax_opd_var1(int m_A, int n_A, double *buff_A, int rs_A, int cs_A, double *buff_x, int inc_x, double *buff_v, int inc_v, double *buff_w, int inc_w)
Definition FLA_Fused_Ahx_Ax_opt_var1.c:173
FLA_Error FLA_Househ2_UT_l_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *tau)
Definition FLA_Househ2_UT.c:274
void bl1_daxpyv(conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy)
Definition bl1_axpyv.c:21
void bl1_ddot(conj1_t conj, int n, double *x, int incx, double *y, int incy, double *rho)
Definition bl1_dot.c:26
void bl1_dgemv(trans1_t transa, conj1_t conjx, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
Definition bl1_gemv.c:69
void bl1_dger(conj1_t conjx, conj1_t conjy, int m, int n, double *alpha, double *x, int incx, double *y, int incy, double *a, int a_rs, int a_cs)
Definition bl1_ger.c:62
void bl1_dscalv(conj1_t conj, int n, double *alpha, double *x, int incx)
Definition bl1_scalv.c:24

References bl1_daxpyv(), bl1_ddot(), bl1_dgemv(), bl1_dger(), bl1_dscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Fused_Ahx_Ax_opd_var1(), FLA_Fused_Gerc2_opd_var1(), FLA_Househ2_UT_l_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_ofu_var2().

◆ FLA_Hess_UT_step_ofs_var2()

FLA_Error FLA_Hess_UT_step_ofs_var2 ( int  m_A,
int  m_T,
float buff_A,
int  rs_A,
int  cs_A,
float buff_T,
int  rs_T,
int  cs_T 
)
101{
102 float* buff_2 = FLA_FLOAT_PTR( FLA_TWO );
103 float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
104 float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
106
107 float first_elem;
108 float dot_product;
109 float beta, conj_beta;
110 float inv_tau11;
111 float minus_inv_tau11;
112 int i;
113
114 // b_alg = FLA_Obj_length( T );
115 int b_alg = m_T;
116
117 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &y );
118 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
119 float* buff_y = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
120 float* buff_z = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
121 int inc_y = 1;
122 int inc_z = 1;
123
124 for ( i = 0; i < b_alg; ++i )
125 {
126 float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
127 float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
128 float* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
129 float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
130 float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
131
132 float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
133 float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
134
135 float* y0 = buff_y + (0 )*inc_y;
136 float* y2 = buff_y + (i+1)*inc_y;
137
138 float* z2 = buff_z + (i+1)*inc_z;
139
140 float* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
141 float* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
142
143 int m_ahead = m_A - i - 1;
144 int n_ahead = m_A - i - 1;
145 int m_behind = i;
146 int n_behind = i;
147
148 /*------------------------------------------------------------*/
149
150 if ( m_ahead > 0 )
151 {
152 // FLA_Househ2_UT( FLA_LEFT,
153 // a21_t,
154 // a21_b, tau11 );
156 a21_t,
157 a21_b, rs_A,
158 tau11 );
159
160 // FLA_Set( FLA_ONE, inv_tau11 );
161 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
162 // FLA_Copy( inv_tau11, minus_inv_tau11 );
163 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
166
167 // FLA_Copy( a21_t, first_elem );
168 // FLA_Set( FLA_ONE, a21_t );
169 first_elem = *a21_t;
170 *a21_t = *buff_1;
171
172 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, y2 );
173 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, z2 );
175 n_ahead,
176 A22, rs_A, cs_A,
177 a21, rs_A,
178 y2, inc_y,
179 z2, inc_z );
180
181 // FLA_Dotc( FLA_CONJUGATE, a21, z2, beta );
182 // FLA_Inv_scal( FLA_TWO, beta );
183 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
185 m_ahead,
186 a21, rs_A,
187 z2, inc_z,
188 &beta );
191
192 // FLA_Scal( minus_inv_tau11, conj_beta );
193 // FLA_Axpy( conj_beta, a21, y2 );
194 // FLA_Scal( inv_tau11, y2 );
197 m_ahead,
198 &conj_beta,
199 a21, rs_A,
200 y2, inc_y );
202 m_ahead,
203 &inv_tau11,
204 y2, inc_y );
205
206 // FLA_Scal( minus_inv_tau11, beta );
207 // FLA_Axpy( beta, a21, z2 );
208 // FLA_Scal( inv_tau11, z2 );
211 m_ahead,
212 &beta,
213 a21, rs_A,
214 z2, inc_z );
216 m_ahead,
217 &inv_tau11,
218 z2, inc_z );
219
220 // FLA_Dot( a12t, a21, dot_product );
221 // FLA_Scal( minus_inv_tau11, dot_product );
222 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
224 m_ahead,
225 a12t, cs_A,
226 a21, rs_A,
227 &dot_product );
230 m_ahead,
232 a21, rs_A,
233 a12t, cs_A );
234
235 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, y0 );
236 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, y0, a21, A02 );
239 m_behind,
240 n_ahead,
241 buff_1,
242 A02, rs_A, cs_A,
243 a21, rs_A,
244 buff_0,
245 y0, inc_y );
248 m_behind,
249 n_ahead,
251 y0, inc_y,
252 a21, rs_A,
253 A02, rs_A, cs_A );
254
255 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, a21, y2, A22 );
256 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, a21, A22 );
258 n_ahead,
259 buff_m1,
260 a21, rs_A,
261 y2, inc_y,
262 z2, inc_z,
263 a21, rs_A,
264 A22, rs_A, cs_A );
265
266 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
269 m_ahead,
270 n_behind,
271 buff_1,
272 A20, rs_A, cs_A,
273 a21, rs_A,
274 buff_0,
275 t01, rs_T );
276
277 // FLA_Copy( first_elem, a21_t );
278 *a21_t = first_elem;
279 }
280
281 /*------------------------------------------------------------*/
282
283 }
284
285 // FLA_Obj_free( &y );
286 // FLA_Obj_free( &z );
287 FLA_free( buff_y );
288 FLA_free( buff_z );
289
290 return FLA_SUCCESS;
291}
FLA_Error FLA_Fused_Gerc2_ops_var1(int m_A, int n_A, float *buff_alpha, float *buff_u, int inc_u, float *buff_y, int inc_y, float *buff_z, int inc_z, float *buff_v, int inc_v, float *buff_A, int rs_A, int cs_A)
Definition FLA_Fused_Gerc2_opt_var1.c:130
FLA_Error FLA_Fused_Ahx_Ax_ops_var1(int m_A, int n_A, float *buff_A, int rs_A, int cs_A, float *buff_x, int inc_x, float *buff_v, int inc_v, float *buff_w, int inc_w)
Definition FLA_Fused_Ahx_Ax_opt_var1.c:116
FLA_Error FLA_Househ2_UT_l_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *tau)
Definition FLA_Househ2_UT.c:160
void bl1_saxpyv(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy)
Definition bl1_axpyv.c:13
void bl1_sdot(conj1_t conj, int n, float *x, int incx, float *y, int incy, float *rho)
Definition bl1_dot.c:13
void bl1_sgemv(trans1_t transa, conj1_t conjx, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
Definition bl1_gemv.c:13
void bl1_sger(conj1_t conjx, conj1_t conjy, int m, int n, float *alpha, float *x, int incx, float *y, int incy, float *a, int a_rs, int a_cs)
Definition bl1_ger.c:13
void bl1_sscalv(conj1_t conj, int n, float *alpha, float *x, int incx)
Definition bl1_scalv.c:13

References bl1_saxpyv(), bl1_sdot(), bl1_sgemv(), bl1_sger(), bl1_sscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Fused_Ahx_Ax_ops_var1(), FLA_Fused_Gerc2_ops_var1(), FLA_Househ2_UT_l_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_ofu_var2().

◆ FLA_Hess_UT_step_ofu_var2()

FLA_Error FLA_Hess_UT_step_ofu_var2 ( FLA_Obj  A,
FLA_Obj  T 
)
19{
20 FLA_Datatype datatype;
21 int m_A, m_T;
22 int rs_A, cs_A;
23 int rs_T, cs_T;
24
25 datatype = FLA_Obj_datatype( A );
26
27 m_A = FLA_Obj_length( A );
28 m_T = FLA_Obj_length( T );
29
32
35
36
37 switch ( datatype )
38 {
39 case FLA_FLOAT:
40 {
41 float* buff_A = FLA_FLOAT_PTR( A );
42 float* buff_T = FLA_FLOAT_PTR( T );
43
45 m_T,
47 buff_T, rs_T, cs_T );
48
49 break;
50 }
51
52 case FLA_DOUBLE:
53 {
54 double* buff_A = FLA_DOUBLE_PTR( A );
55 double* buff_T = FLA_DOUBLE_PTR( T );
56
58 m_T,
60 buff_T, rs_T, cs_T );
61
62 break;
63 }
64
65 case FLA_COMPLEX:
66 {
69
71 m_T,
73 buff_T, rs_T, cs_T );
74
75 break;
76 }
77
79 {
82
84 m_T,
86 buff_T, rs_T, cs_T );
87
88 break;
89 }
90 }
91
92 return FLA_SUCCESS;
93}
FLA_Error FLA_Hess_UT_step_ofd_var2(int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T)
Definition FLA_Hess_UT_fus_var2.c:295
FLA_Error FLA_Hess_UT_step_ofs_var2(int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T)
Definition FLA_Hess_UT_fus_var2.c:97
FLA_Error FLA_Hess_UT_step_ofc_var2(int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T)
Definition FLA_Hess_UT_fus_var2.c:493
FLA_Error FLA_Hess_UT_step_ofz_var2(int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T)
Definition FLA_Hess_UT_fus_var2.c:691
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition FLA_Query.c:167
dim_t FLA_Obj_length(FLA_Obj obj)
Definition FLA_Query.c:116
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition FLA_Query.c:174
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition FLA_Query.c:13
int FLA_Datatype
Definition FLA_type_defs.h:49
Definition blis_type_defs.h:138

References FLA_Hess_UT_step_ofc_var2(), FLA_Hess_UT_step_ofd_var2(), FLA_Hess_UT_step_ofs_var2(), FLA_Hess_UT_step_ofz_var2(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and i.

Referenced by FLA_Hess_UT_blf_var2(), and FLA_Hess_UT_ofu_var2().

◆ FLA_Hess_UT_step_ofz_var2()

FLA_Error FLA_Hess_UT_step_ofz_var2 ( int  m_A,
int  m_T,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_T,
int  rs_T,
int  cs_T 
)
695{
700
706 int i;
707
708 // b_alg = FLA_Obj_length( T );
709 int b_alg = m_T;
710
711 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &y );
712 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
713 dcomplex* buff_y = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
714 dcomplex* buff_z = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
715 int inc_y = 1;
716 int inc_z = 1;
717
718 for ( i = 0; i < b_alg; ++i )
719 {
720 dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
721 dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
722 dcomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
723 dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
724 dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
725
726 dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
727 dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
728
729 dcomplex* y0 = buff_y + (0 )*inc_y;
730 dcomplex* y2 = buff_y + (i+1)*inc_y;
731
732 dcomplex* z2 = buff_z + (i+1)*inc_z;
733
734 dcomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
735 dcomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
736
737 int m_ahead = m_A - i - 1;
738 int n_ahead = m_A - i - 1;
739 int m_behind = i;
740 int n_behind = i;
741
742 /*------------------------------------------------------------*/
743
744 if ( m_ahead > 0 )
745 {
746 // FLA_Househ2_UT( FLA_LEFT,
747 // a21_t,
748 // a21_b, tau11 );
750 a21_t,
751 a21_b, rs_A,
752 tau11 );
753
754 // FLA_Set( FLA_ONE, inv_tau11 );
755 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
756 // FLA_Copy( inv_tau11, minus_inv_tau11 );
757 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
760
761 // FLA_Copy( a21_t, first_elem );
762 // FLA_Set( FLA_ONE, a21_t );
763 first_elem = *a21_t;
764 *a21_t = *buff_1;
765
766 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, y2 );
767 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, z2 );
769 n_ahead,
770 A22, rs_A, cs_A,
771 a21, rs_A,
772 y2, inc_y,
773 z2, inc_z );
774
775 // FLA_Dotc( FLA_CONJUGATE, a21, z2, beta );
776 // FLA_Inv_scal( FLA_TWO, beta );
777 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
779 m_ahead,
780 a21, rs_A,
781 z2, inc_z,
782 &beta );
785
786 // FLA_Scal( minus_inv_tau11, conj_beta );
787 // FLA_Axpy( conj_beta, a21, y2 );
788 // FLA_Scal( inv_tau11, y2 );
791 m_ahead,
792 &conj_beta,
793 a21, rs_A,
794 y2, inc_y );
796 m_ahead,
797 &inv_tau11,
798 y2, inc_y );
799
800 // FLA_Scal( minus_inv_tau11, beta );
801 // FLA_Axpy( beta, a21, z2 );
802 // FLA_Scal( inv_tau11, z2 );
805 m_ahead,
806 &beta,
807 a21, rs_A,
808 z2, inc_z );
810 m_ahead,
811 &inv_tau11,
812 z2, inc_z );
813
814 // FLA_Dot( a12t, a21, dot_product );
815 // FLA_Scal( minus_inv_tau11, dot_product );
816 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
818 m_ahead,
819 a12t, cs_A,
820 a21, rs_A,
821 &dot_product );
824 m_ahead,
826 a21, rs_A,
827 a12t, cs_A );
828
829 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, y0 );
830 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, y0, a21, A02 );
833 m_behind,
834 n_ahead,
835 buff_1,
836 A02, rs_A, cs_A,
837 a21, rs_A,
838 buff_0,
839 y0, inc_y );
842 m_behind,
843 n_ahead,
845 y0, inc_y,
846 a21, rs_A,
847 A02, rs_A, cs_A );
848
849 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, a21, y2, A22 );
850 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, a21, A22 );
852 n_ahead,
853 buff_m1,
854 a21, rs_A,
855 y2, inc_y,
856 z2, inc_z,
857 a21, rs_A,
858 A22, rs_A, cs_A );
859
860 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
863 m_ahead,
864 n_behind,
865 buff_1,
866 A20, rs_A, cs_A,
867 a21, rs_A,
868 buff_0,
869 t01, rs_T );
870
871 // FLA_Copy( first_elem, a21_t );
872 *a21_t = first_elem;
873 }
874
875 /*------------------------------------------------------------*/
876
877 }
878
879 // FLA_Obj_free( &y );
880 // FLA_Obj_free( &z );
881 FLA_free( buff_y );
882 FLA_free( buff_z );
883
884 return FLA_SUCCESS;
885}
FLA_Error FLA_Fused_Gerc2_opz_var1(int m_A, int n_A, dcomplex *buff_alpha, dcomplex *buff_u, int inc_u, dcomplex *buff_y, int inc_y, dcomplex *buff_z, int inc_z, dcomplex *buff_v, int inc_v, dcomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Fused_Gerc2_opt_var1.c:306
FLA_Error FLA_Fused_Ahx_Ax_opz_var1(int m_A, int n_A, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_x, int inc_x, dcomplex *buff_v, int inc_v, dcomplex *buff_w, int inc_w)
Definition FLA_Fused_Ahx_Ax_opt_var1.c:307
FLA_Error FLA_Househ2_UT_l_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *tau)
Definition FLA_Househ2_UT.c:521
void bl1_zaxpyv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy)
Definition bl1_axpyv.c:60
void bl1_zdot(conj1_t conj, int n, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *rho)
Definition bl1_dot.c:65
bl1_zscals(beta, rho_yz)
void bl1_zgemv(trans1_t transa, conj1_t conjx, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition bl1_gemv.c:255
void bl1_zger(conj1_t conjx, conj1_t conjy, int m, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *a, int a_rs, int a_cs)
Definition bl1_ger.c:194
void bl1_zscalv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx)
Definition bl1_scalv.c:72

References bl1_zaxpyv(), bl1_zdot(), bl1_zgemv(), bl1_zger(), bl1_zscals(), bl1_zscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Fused_Ahx_Ax_opz_var1(), FLA_Fused_Gerc2_opz_var1(), FLA_Househ2_UT_l_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_ofu_var2().