libflame revision_anchor
Functions
FLA_Hess_UT_vars.h File Reference

(r)

Go to the source code of this file.

Functions

FLA_Error FLA_Hess_UT_blk_var1 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_unb_var1 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_unb_var1 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_blk_var2 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_blf_var2 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_unb_var2 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_unb_var2 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_blk_var3 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_blf_var3 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_unb_var3 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_unb_var3 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_blk_var4 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_blf_var4 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_unb_var4 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_unb_var4 (FLA_Obj A, FLA_Obj Y, FLA_Obj Z, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_blk_var5 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_unb_var5 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_unb_var5 (FLA_Obj A, FLA_Obj U, FLA_Obj Z, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_opt_var1 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_opt_var1 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_ops_var1 (int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_opd_var1 (int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_opc_var1 (int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_opz_var1 (int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_opt_var2 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_opt_var2 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_ops_var2 (int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_opd_var2 (int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_opc_var2 (int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_opz_var2 (int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_opt_var3 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_opt_var3 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_ops_var3 (int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_opd_var3 (int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_opc_var3 (int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_opz_var3 (int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_opt_var4 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_opt_var4 (FLA_Obj A, FLA_Obj Y, FLA_Obj Z, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_ops_var4 (int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_Y, int rs_Y, int cs_Y, float *buff_Z, int rs_Z, int cs_Z, float *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_opd_var4 (int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_Y, int rs_Y, int cs_Y, double *buff_Z, int rs_Z, int cs_Z, double *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_opc_var4 (int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_Y, int rs_Y, int cs_Y, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_opz_var4 (int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_Y, int rs_Y, int cs_Y, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_opt_var5 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_opt_var5 (FLA_Obj A, FLA_Obj U, FLA_Obj Z, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_ops_var5 (int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_U, int rs_U, int cs_U, float *buff_Z, int rs_Z, int cs_Z, float *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_opd_var5 (int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_U, int rs_U, int cs_U, double *buff_Z, int rs_Z, int cs_Z, double *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_opc_var5 (int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_U, int rs_U, int cs_U, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_opz_var5 (int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_U, int rs_U, int cs_U, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_ofu_var1 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_ofu_var1 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_ofs_var1 (int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_ofd_var1 (int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_ofc_var1 (int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_ofz_var1 (int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_ofu_var2 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_ofu_var2 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_ofs_var2 (int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_ofd_var2 (int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_ofc_var2 (int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_ofz_var2 (int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_ofu_var3 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_ofu_var3 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_ofs_var3 (int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_ofd_var3 (int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_ofc_var3 (int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_ofz_var3 (int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_ofu_var4 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_ofu_var4 (FLA_Obj A, FLA_Obj Y, FLA_Obj Z, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_ofs_var4 (int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_Y, int rs_Y, int cs_Y, float *buff_Z, int rs_Z, int cs_Z, float *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_ofd_var4 (int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_Y, int rs_Y, int cs_Y, double *buff_Z, int rs_Z, int cs_Z, double *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_ofc_var4 (int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_Y, int rs_Y, int cs_Y, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_ofz_var4 (int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_Y, int rs_Y, int cs_Y, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Fused_Ahx_Ax_ops_var1 (int m_A, int n_A, float *buff_A, int rs_A, int cs_A, float *buff_x, int inc_x, float *buff_v, int inc_v, float *buff_w, int inc_w)
 
FLA_Error FLA_Fused_Ahx_Ax_opd_var1 (int m_A, int n_A, double *buff_A, int rs_A, int cs_A, double *buff_x, int inc_x, double *buff_v, int inc_v, double *buff_w, int inc_w)
 
FLA_Error FLA_Fused_Ahx_Ax_opc_var1 (int m_A, int n_A, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_x, int inc_x, scomplex *buff_v, int inc_v, scomplex *buff_w, int inc_w)
 
FLA_Error FLA_Fused_Ahx_Ax_opz_var1 (int m_A, int n_A, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_x, int inc_x, dcomplex *buff_v, int inc_v, dcomplex *buff_w, int inc_w)
 
FLA_Error FLA_Fused_Gerc2_Ahx_Ax_ops_var1 (int m_A, int n_A, float *buff_alpha, float *buff_u, int inc_u, float *buff_y, int inc_y, float *buff_z, int inc_z, float *buff_A, int rs_A, int cs_A, float *buff_x, int inc_x, float *buff_v, int inc_v, float *buff_w, int inc_w)
 
FLA_Error FLA_Fused_Gerc2_Ahx_Ax_opd_var1 (int m_A, int n_A, double *buff_alpha, double *buff_u, int inc_u, double *buff_y, int inc_y, double *buff_z, int inc_z, double *buff_A, int rs_A, int cs_A, double *buff_x, int inc_x, double *buff_v, int inc_v, double *buff_w, int inc_w)
 
FLA_Error FLA_Fused_Gerc2_Ahx_Ax_opc_var1 (int m_A, int n_A, scomplex *buff_alpha, scomplex *buff_u, int inc_u, scomplex *buff_y, int inc_y, scomplex *buff_z, int inc_z, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_x, int inc_x, scomplex *buff_v, int inc_v, scomplex *buff_w, int inc_w)
 
FLA_Error FLA_Fused_Gerc2_Ahx_Ax_opz_var1 (int m_A, int n_A, dcomplex *buff_alpha, dcomplex *buff_u, int inc_u, dcomplex *buff_y, int inc_y, dcomplex *buff_z, int inc_z, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_x, int inc_x, dcomplex *buff_v, int inc_v, dcomplex *buff_w, int inc_w)
 
FLA_Error FLA_Fused_Uhu_Yhu_Zhu_ops_var1 (int m_U, int n_U, float *buff_delta, float *buff_U, int rs_U, int cs_U, float *buff_Y, int rs_Y, int cs_Y, float *buff_Z, int rs_Z, int cs_Z, float *buff_t, int inc_t, float *buff_u, int inc_u, float *buff_y, int inc_y, float *buff_z, int inc_z)
 
FLA_Error FLA_Fused_Uhu_Yhu_Zhu_opd_var1 (int m_U, int n_U, double *buff_delta, double *buff_U, int rs_U, int cs_U, double *buff_Y, int rs_Y, int cs_Y, double *buff_Z, int rs_Z, int cs_Z, double *buff_t, int inc_t, double *buff_u, int inc_u, double *buff_y, int inc_y, double *buff_z, int inc_z)
 
FLA_Error FLA_Fused_Uhu_Yhu_Zhu_opc_var1 (int m_U, int n_U, scomplex *buff_delta, scomplex *buff_U, int rs_U, int cs_U, scomplex *buff_Y, int rs_Y, int cs_Y, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_t, int inc_t, scomplex *buff_u, int inc_u, scomplex *buff_y, int inc_y, scomplex *buff_z, int inc_z)
 
FLA_Error FLA_Fused_Uhu_Yhu_Zhu_opz_var1 (int m_U, int n_U, dcomplex *buff_delta, dcomplex *buff_U, int rs_U, int cs_U, dcomplex *buff_Y, int rs_Y, int cs_Y, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_t, int inc_t, dcomplex *buff_u, int inc_u, dcomplex *buff_y, int inc_y, dcomplex *buff_z, int inc_z)
 

Function Documentation

◆ FLA_Fused_Ahx_Ax_opc_var1()

FLA_Error FLA_Fused_Ahx_Ax_opc_var1 ( int  m_A,
int  n_A,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_x,
int  inc_x,
scomplex buff_v,
int  inc_v,
scomplex buff_w,
int  inc_w 
)
262{
264 int i;
265
266 bl1_csetv( m_A,
267 buff_0,
268 buff_w, inc_w );
269
270 for ( i = 0; i < n_A; ++i )
271 {
272 scomplex* a1 = buff_A + (i )*cs_A + (0 )*rs_A;
273 scomplex* nu1 = buff_v + (i )*inc_v;
274 scomplex* x = buff_x;
275 scomplex* chi1 = buff_x + (i )*inc_x;
276 scomplex* w = buff_w;
277
278 /*------------------------------------------------------------*/
279
281 m_A,
282 a1, rs_A,
283 x, inc_x,
284 nu1 );
285
287 m_A,
288 chi1,
289 a1, rs_A,
290 w, inc_w );
291/*
292 F77_caxpy( &m_A,
293 chi1,
294 a1, &rs_A,
295 w, &inc_w );
296*/
297
298 /*------------------------------------------------------------*/
299
300 }
301
302 return FLA_SUCCESS;
303}
FLA_Obj FLA_ZERO
Definition FLA_Init.c:20
int i
Definition bl1_axmyv2.c:145
chi1
Definition bl1_axmyv2.c:366
void bl1_caxpyv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
Definition bl1_axpyv.c:29
void bl1_cdot(conj1_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho)
Definition bl1_dot.c:39
void bl1_csetv(int m, scomplex *sigma, scomplex *x, int incx)
Definition bl1_setv.c:52
@ BLIS1_CONJUGATE
Definition blis_type_defs.h:82
@ BLIS1_NO_CONJUGATE
Definition blis_type_defs.h:81
Definition blis_type_defs.h:133

References bl1_caxpyv(), bl1_cdot(), bl1_csetv(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, chi1, FLA_ZERO, and i.

Referenced by FLA_Fused_Ahx_Ax_opt_var1(), FLA_Hess_UT_step_ofc_var2(), FLA_Hess_UT_step_ofc_var3(), and FLA_Hess_UT_step_ofc_var4().

◆ FLA_Fused_Ahx_Ax_opd_var1()

FLA_Error FLA_Fused_Ahx_Ax_opd_var1 ( int  m_A,
int  n_A,
double buff_A,
int  rs_A,
int  cs_A,
double buff_x,
int  inc_x,
double buff_v,
int  inc_v,
double buff_w,
int  inc_w 
)
179{
180 double zero = bl1_d0();
181 int i;
182
183 double* restrict w = buff_w;
184 double* restrict x = buff_x;
185
186 double* restrict a1;
187 double* restrict a2;
188 double* restrict nu1;
189 double* restrict nu2;
190 double* restrict chi1;
191 double* restrict chi2;
192
193 int n_run = n_A / 2;
194 int n_left = n_A % 2;
195 int step_a1 = 2*cs_A;
196 int step_nu1 = 2*inc_v;
197 int step_chi1 = 2*inc_x;
198
199 bl1_dsetv( m_A,
200 &zero,
201 buff_w, inc_w );
202
203 a1 = buff_A;
204 a2 = buff_A + cs_A;
205 nu1 = buff_v;
206 nu2 = buff_v + inc_v;
207 chi1 = buff_x;
208 chi2 = buff_x + inc_x;
209
210 for ( i = 0; i < n_run; ++i )
211 {
212 /*------------------------------------------------------------*/
213
215 a1, rs_A,
216 a2, rs_A,
217 x, inc_x,
218 chi1,
219 chi2,
220 nu1,
221 nu2,
222 w, inc_w );
223
224 /*------------------------------------------------------------*/
225
226 a1 += step_a1;
227 a2 += step_a1;
228 nu1 += step_nu1;
229 nu2 += step_nu1;
230 chi1 += step_chi1;
231 chi2 += step_chi1;
232 }
233
234 if ( n_left > 0 )
235 {
236 for ( i = 0; i < n_left; ++i )
237 {
239 a1, rs_A,
240 x, inc_x,
241 chi1,
242 nu1,
243 w, inc_w );
244
245 a1 += rs_A;
246 nu1 += inc_v;
247 chi1 += inc_x;
248 }
249 }
250
251 return FLA_SUCCESS;
252}
int n_left
Definition bl1_axmyv2.c:149
int n_run
Definition bl1_axmyv2.c:148
double *restrict chi2
Definition bl1_axpyv2b.c:142
void bl1_ddotaxpy(int n, double *a, int inc_a, double *x, int inc_x, double *kappa, double *rho, double *w, int inc_w)
Definition bl1_dotaxpy.c:31
void bl1_ddotv2axpyv2b(int n, double *a1, int inc_a1, double *a2, int inc_a2, double *x, int inc_x, double *kappa1, double *kappa2, double *rho1, double *rho2, double *w, int inc_w)
Definition bl1_dotv2axpyv2b.c:36
void bl1_dsetv(int m, double *sigma, double *x, int incx)
Definition bl1_setv.c:39
double bl1_d0(void)
Definition bl1_constants.c:118

References bl1_d0(), bl1_ddotaxpy(), bl1_ddotv2axpyv2b(), bl1_dsetv(), chi1, chi2, i, n_left, and n_run.

Referenced by FLA_Fused_Ahx_Ax_opt_var1(), FLA_Hess_UT_step_ofd_var2(), FLA_Hess_UT_step_ofd_var3(), and FLA_Hess_UT_step_ofd_var4().

◆ FLA_Fused_Ahx_Ax_ops_var1()

FLA_Error FLA_Fused_Ahx_Ax_ops_var1 ( int  m_A,
int  n_A,
float buff_A,
int  rs_A,
int  cs_A,
float buff_x,
int  inc_x,
float buff_v,
int  inc_v,
float buff_w,
int  inc_w 
)
122{
123 float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
124 int i;
125
126 bl1_ssetv( m_A,
127 buff_0,
128 buff_w, inc_w );
129
130 for ( i = 0; i < n_A; ++i )
131 {
132 float* a1 = buff_A + (i )*cs_A + (0 )*rs_A;
133 float* nu1 = buff_v + (i )*inc_v;
134 float* x = buff_x;
135 float* chi1 = buff_x + (i )*inc_x;
136 float* w = buff_w;
137
138 /*------------------------------------------------------------*/
139
141 m_A,
142 a1, rs_A,
143 x, inc_x,
144 nu1 );
145/*
146 *nu1 = F77_sdot( &m_A,
147 a1, &rs_A,
148 x, &inc_x );
149*/
150
152 m_A,
153 chi1,
154 a1, rs_A,
155 w, inc_w );
156/*
157 F77_saxpy( &m_A,
158 chi1,
159 a1, &rs_A,
160 w, &inc_w );
161*/
162
163 /*------------------------------------------------------------*/
164
165 }
166
167
168 return FLA_SUCCESS;
169}
void bl1_saxpyv(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy)
Definition bl1_axpyv.c:13
void bl1_sdot(conj1_t conj, int n, float *x, int incx, float *y, int incy, float *rho)
Definition bl1_dot.c:13
void bl1_ssetv(int m, float *sigma, float *x, int incx)
Definition bl1_setv.c:26

References bl1_saxpyv(), bl1_sdot(), bl1_ssetv(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, chi1, FLA_ZERO, and i.

Referenced by FLA_Fused_Ahx_Ax_opt_var1(), FLA_Hess_UT_step_ofs_var2(), FLA_Hess_UT_step_ofs_var3(), and FLA_Hess_UT_step_ofs_var4().

◆ FLA_Fused_Ahx_Ax_opz_var1()

FLA_Error FLA_Fused_Ahx_Ax_opz_var1 ( int  m_A,
int  n_A,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_x,
int  inc_x,
dcomplex buff_v,
int  inc_v,
dcomplex buff_w,
int  inc_w 
)
313{
314 dcomplex zero = bl1_z0();
315 int i;
316
319
326
327 int n_run = n_A / 2;
328 int n_left = n_A % 2;
329 int step_a1 = 2*cs_A;
330 int step_nu1 = 2*inc_v;
331 int step_chi1 = 2*inc_x;
332
333 bl1_zsetv( m_A,
334 &zero,
335 buff_w, inc_w );
336
337 a1 = buff_A;
338 a2 = buff_A + cs_A;
339 nu1 = buff_v;
340 nu2 = buff_v + inc_v;
341 chi1 = buff_x;
342 chi2 = buff_x + inc_x;
343
344 for ( i = 0; i < n_run; ++i )
345 {
346 /*------------------------------------------------------------*/
347
348/*
349 bl1_zdotaxpy( m_A,
350 a1, rs_A,
351 x, inc_x,
352 chi1,
353 nu1,
354 w, inc_w );
355*/
356
358 a1, rs_A,
359 a2, rs_A,
360 x, inc_x,
361 chi1,
362 chi2,
363 nu1,
364 nu2,
365 w, inc_w );
366
367 /*------------------------------------------------------------*/
368
369 a1 += step_a1;
370 a2 += step_a1;
371 nu1 += step_nu1;
372 nu2 += step_nu1;
373 chi1 += step_chi1;
374 chi2 += step_chi1;
375 }
376
377 if ( n_left > 0 )
378 {
379 for ( i = 0; i < n_left; ++i )
380 {
382 a1, rs_A,
383 x, inc_x,
384 chi1,
385 nu1,
386 w, inc_w );
387
388 a1 += rs_A;
389 nu1 += inc_v;
390 chi1 += inc_x;
391 }
392 }
393
394 return FLA_SUCCESS;
395}
void bl1_zdotaxpy(int n, dcomplex *a, int inc_a, dcomplex *x, int inc_x, dcomplex *kappa, dcomplex *rho, dcomplex *w, int inc_w)
Definition bl1_dotaxpy.c:258
void bl1_zdotv2axpyv2b(int n, dcomplex *a1, int inc_a1, dcomplex *a2, int inc_a2, dcomplex *x, int inc_x, dcomplex *kappa1, dcomplex *kappa2, dcomplex *rho1, dcomplex *rho2, dcomplex *w, int inc_w)
Definition bl1_dotv2axpyv2b.c:331
dcomplex bl1_z0(void)
Definition bl1_constants.c:133
void bl1_zsetv(int m, dcomplex *sigma, dcomplex *x, int incx)
Definition bl1_setv.c:66
Definition blis_type_defs.h:138

References bl1_z0(), bl1_zdotaxpy(), bl1_zdotv2axpyv2b(), bl1_zsetv(), chi1, chi2, i, n_left, and n_run.

Referenced by FLA_Fused_Ahx_Ax_opt_var1(), FLA_Hess_UT_step_ofz_var2(), FLA_Hess_UT_step_ofz_var3(), and FLA_Hess_UT_step_ofz_var4().

◆ FLA_Fused_Gerc2_Ahx_Ax_opc_var1()

FLA_Error FLA_Fused_Gerc2_Ahx_Ax_opc_var1 ( int  m_A,
int  n_A,
scomplex buff_alpha,
scomplex buff_u,
int  inc_u,
scomplex buff_y,
int  inc_y,
scomplex buff_z,
int  inc_z,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_x,
int  inc_x,
scomplex buff_v,
int  inc_v,
scomplex buff_w,
int  inc_w 
)
337{
339 int i;
340
341 bl1_csetv( m_A,
342 buff_0,
343 buff_w, inc_w );
344
345 for ( i = 0; i < n_A; ++i )
346 {
347 scomplex* a1 = buff_A + (i )*cs_A + (0 )*rs_A;
348 scomplex* nu1 = buff_v + (i )*inc_v;
349 scomplex* x = buff_x;
350 scomplex* chi1 = buff_x + (i )*inc_x;
351 scomplex* psi1 = buff_y + (i )*inc_y;
352 scomplex* u = buff_u;
354 scomplex* w = buff_w;
355 scomplex* z = buff_z;
361
362 /*------------------------------------------------------------*/
363
366
369
371 m_A,
372 &temp1,
373 u, inc_u,
374 a1, rs_A );
375/*
376 F77_caxpy( &m_A,
377 &temp1,
378 u, &inc_u,
379 a1, &rs_A );
380*/
381
383 m_A,
384 &temp2,
385 z, inc_z,
386 a1, rs_A );
387/*
388 F77_caxpy( &m_A,
389 &temp2,
390 z, &inc_z,
391 a1, &rs_A );
392*/
393
395 m_A,
396 a1, rs_A,
397 x, inc_x,
398 nu1 );
399
401 m_A,
402 chi1,
403 a1, rs_A,
404 w, inc_w );
405/*
406 F77_caxpy( &m_A,
407 chi1,
408 a1, &rs_A,
409 w, &inc_w );
410*/
411
412 /*------------------------------------------------------------*/
413
414 }
415
416 return FLA_SUCCESS;
417}
double *restrict psi1
Definition bl1_axmyv2.c:141
double temp2
Definition bl1_axpyv2b.c:147
double temp1
Definition bl1_axpyv2b.c:146
upsilon1
Definition bl1_axpyv2bdotaxpy.c:225

References bl1_caxpyv(), bl1_cdot(), bl1_csetv(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, chi1, FLA_ZERO, i, psi1, temp1, temp2, and upsilon1.

Referenced by FLA_Fused_Gerc2_Ahx_Ax_opt_var1(), and FLA_Hess_UT_step_ofc_var3().

◆ FLA_Fused_Gerc2_Ahx_Ax_opd_var1()

FLA_Error FLA_Fused_Gerc2_Ahx_Ax_opd_var1 ( int  m_A,
int  n_A,
double buff_alpha,
double buff_u,
int  inc_u,
double buff_y,
int  inc_y,
double buff_z,
int  inc_z,
double buff_A,
int  rs_A,
int  cs_A,
double buff_x,
int  inc_x,
double buff_v,
int  inc_v,
double buff_w,
int  inc_w 
)
258{
259 double zero = bl1_d0();
260
261 double* restrict alpha = buff_alpha;
262 double* restrict u = buff_u;
263 double* restrict z = buff_z;
264 double* restrict x = buff_x;
265 double* restrict w = buff_w;
266
267 double* restrict a1;
268 double* restrict nu1;
269 double* restrict chi1;
270 double* restrict psi1;
271 double* restrict upsilon1;
272
273 double alpha_psi1;
274 double alpha_upsilon1;
275
276 int n_run = n_A / 1;
277 //int n_left = n_A % 1;
278 int step_a1 = 1*cs_A;
279 int step_nu1 = 1*inc_v;
280 int step_chi1 = 1*inc_x;
281 int step_psi1 = 1*inc_y;
282 int step_upsilon1 = 1*inc_u;
283 int i;
284
285 bl1_dsetv( m_A,
286 &zero,
287 buff_w, inc_w );
288
289 a1 = buff_A;
290 nu1 = buff_v;
291 chi1 = buff_x;
292 psi1 = buff_y;
294
295 for ( i = 0; i < n_run; ++i )
296 {
297 /*------------------------------------------------------------*/
298
301
303 &alpha_psi1,
304 u, inc_u,
306 z, inc_z,
307 a1, rs_A,
308 x, inc_x,
309 chi1,
310 nu1,
311 w, inc_w );
312
313 /*------------------------------------------------------------*/
314
315 a1 += step_a1;
316 nu1 += step_nu1;
317 chi1 += step_chi1;
318 psi1 += step_psi1;
320 }
321
322 return FLA_SUCCESS;
323}
void bl1_daxpyv2bdotaxpy(int n, double *beta, double *u, int inc_u, double *gamma, double *z, int inc_z, double *a, int inc_a, double *x, int inc_x, double *kappa, double *rho, double *w, int inc_w)
Definition bl1_axpyv2bdotaxpy.c:36

References bl1_d0(), bl1_daxpyv2bdotaxpy(), bl1_dsetv(), chi1, i, n_run, psi1, and upsilon1.

Referenced by FLA_Fused_Gerc2_Ahx_Ax_opt_var1(), and FLA_Hess_UT_step_ofd_var3().

◆ FLA_Fused_Gerc2_Ahx_Ax_ops_var1()

FLA_Error FLA_Fused_Gerc2_Ahx_Ax_ops_var1 ( int  m_A,
int  n_A,
float buff_alpha,
float buff_u,
int  inc_u,
float buff_y,
int  inc_y,
float buff_z,
int  inc_z,
float buff_A,
int  rs_A,
int  cs_A,
float buff_x,
int  inc_x,
float buff_v,
int  inc_v,
float buff_w,
int  inc_w 
)
160{
161 float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
162 int i;
163
164 bl1_ssetv( m_A,
165 buff_0,
166 buff_w, inc_w );
167
168 for ( i = 0; i < n_A; ++i )
169 {
170 float* a1 = buff_A + (i )*cs_A + (0 )*rs_A;
171 float* nu1 = buff_v + (i )*inc_v;
172 float* x = buff_x;
173 float* chi1 = buff_x + (i )*inc_x;
174 float* psi1 = buff_y + (i )*inc_y;
175 float* u = buff_u;
176 float* upsilon1 = buff_u + (i )*inc_u;
177 float* w = buff_w;
178 float* z = buff_z;
179 float* alpha = buff_alpha;
180 float temp1;
181 float temp2;
182
183 /*------------------------------------------------------------*/
184
185 // bl1_scopyconj( psi1, &conj_psi1 );
186 // bl1_smult3( alpha, &conj_psi1, &temp1 );
187 temp1 = *alpha * *psi1;
188
189 // bl1_scopyconj( upsilon1, &conj_upsilon1 );
190 // bl1_smult3( alpha, &conj_upsilon1, &temp2 );
191 temp2 = *alpha * *upsilon1;
192
194 m_A,
195 &temp1,
196 u, inc_u,
197 a1, rs_A );
198/*
199 F77_saxpy( &m_A,
200 &temp1,
201 u, &inc_u,
202 a1, &rs_A );
203*/
204
206 m_A,
207 &temp2,
208 z, inc_z,
209 a1, rs_A );
210/*
211 F77_saxpy( &m_A,
212 &temp2,
213 z, &inc_z,
214 a1, &rs_A );
215*/
216
218 m_A,
219 a1, rs_A,
220 x, inc_x,
221 nu1 );
222/*
223 *nu1 = F77_sdot( &m_A,
224 a1, &rs_A,
225 x, &inc_x );
226*/
227
229 m_A,
230 chi1,
231 a1, rs_A,
232 w, inc_w );
233/*
234 F77_saxpy( &m_A,
235 chi1,
236 a1, &rs_A,
237 w, &inc_w );
238*/
239 /*------------------------------------------------------------*/
240
241 }
242
243 return FLA_SUCCESS;
244}

References bl1_saxpyv(), bl1_sdot(), bl1_ssetv(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, chi1, FLA_ZERO, i, psi1, temp1, temp2, and upsilon1.

Referenced by FLA_Fused_Gerc2_Ahx_Ax_opt_var1(), and FLA_Hess_UT_step_ofs_var3().

◆ FLA_Fused_Gerc2_Ahx_Ax_opz_var1()

FLA_Error FLA_Fused_Gerc2_Ahx_Ax_opz_var1 ( int  m_A,
int  n_A,
dcomplex buff_alpha,
dcomplex buff_u,
int  inc_u,
dcomplex buff_y,
int  inc_y,
dcomplex buff_z,
int  inc_z,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_x,
int  inc_x,
dcomplex buff_v,
int  inc_v,
dcomplex buff_w,
int  inc_w 
)
431{
432 dcomplex zero = bl1_z0();
433
439
445
450
451 int n_run = n_A / 1;
452 //int n_left = n_A % 1;
453 int step_a1 = 1*cs_A;
454 int step_nu1 = 1*inc_v;
455 int step_chi1 = 1*inc_x;
456 int step_psi1 = 1*inc_y;
457 int step_upsilon1 = 1*inc_u;
458 int i;
459
460 bl1_zsetv( m_A,
461 &zero,
462 buff_w, inc_w );
463
464 a1 = buff_A;
465 nu1 = buff_v;
466 chi1 = buff_x;
467 psi1 = buff_y;
469
470 for ( i = 0; i < n_run; ++i )
471 {
472 /*------------------------------------------------------------*/
473
476
479
480/*
481 bl1_zaxpyv2bdotaxpy( m_A,
482 &temp1,
483 u, inc_u,
484 &temp2,
485 z, inc_z,
486 a1, rs_A,
487 x, inc_x,
488 chi1,
489 nu1,
490 w, inc_w );
491*/
492
494 &temp1,
495 &temp2,
496 u, inc_u,
497 z, inc_z,
498 a1, rs_A );
500 a1, rs_A,
501 x, inc_x,
502 chi1,
503 nu1,
504 w, inc_w );
505
506 /*------------------------------------------------------------*/
507
508 a1 += step_a1;
509 nu1 += step_nu1;
510 chi1 += step_chi1;
511 psi1 += step_psi1;
513 }
514
515 return FLA_SUCCESS;
516}
void bl1_zaxpyv2b(int n, dcomplex *alpha1, dcomplex *alpha2, dcomplex *x1, int inc_x1, dcomplex *x2, int inc_x2, dcomplex *y, int inc_y)
Definition bl1_axpyv2b.c:210

References bl1_z0(), bl1_zaxpyv2b(), bl1_zdotaxpy(), bl1_zsetv(), chi1, i, n_run, psi1, temp1, temp2, and upsilon1.

Referenced by FLA_Fused_Gerc2_Ahx_Ax_opt_var1(), and FLA_Hess_UT_step_ofz_var3().

◆ FLA_Fused_Uhu_Yhu_Zhu_opc_var1()

FLA_Error FLA_Fused_Uhu_Yhu_Zhu_opc_var1 ( int  m_U,
int  n_U,
scomplex buff_delta,
scomplex buff_U,
int  rs_U,
int  cs_U,
scomplex buff_Y,
int  rs_Y,
int  cs_Y,
scomplex buff_Z,
int  rs_Z,
int  cs_Z,
scomplex buff_t,
int  inc_t,
scomplex buff_u,
int  inc_u,
scomplex buff_y,
int  inc_y,
scomplex buff_z,
int  inc_z 
)
408{
409 int i;
410
411 for ( i = 0; i < n_U; ++i )
412 {
413 scomplex* u1 = buff_U + (i )*cs_U + (0 )*rs_U;
414 scomplex* y1 = buff_Y + (i )*cs_Y + (0 )*rs_Y;
415 scomplex* z1 = buff_Z + (i )*cs_Z + (0 )*rs_Z;
417 scomplex* tau1 = buff_t + (i )*inc_t;
418 scomplex* u = buff_u;
419 scomplex* y = buff_y;
420 scomplex* z = buff_z;
424
425 /*------------------------------------------------------------*/
426
428 m_U,
429 u1, rs_U,
430 u, inc_u,
431 &alpha );
432
434 m_U,
435 z1, rs_Z,
436 u, inc_u,
437 &beta );
438
440 m_U,
441 y1, rs_Y,
442 u, inc_u,
443 &gamma );
444
445 *tau1 = alpha;
446
448 bl1_cscals( delta, &beta );
450
452 m_U,
453 &alpha,
454 y1, rs_Y,
455 y, inc_y );
456 //F77_caxpy( &m_U,
457 // &alpha,
458 // y1, &rs_Y,
459 // y, &inc_y );
460
462 m_U,
463 &beta,
464 u1, rs_U,
465 y, inc_y );
466 //F77_caxpy( &m_U,
467 // &beta,
468 // u1, &rs_U,
469 // y, &inc_y );
470
472 m_U,
473 &alpha,
474 z1, rs_Z,
475 z, inc_z );
476 //F77_caxpy( &m_U,
477 // &alpha,
478 // z1, &rs_Z,
479 // z, &inc_z );
480
482 m_U,
483 &gamma,
484 u1, rs_U,
485 z, inc_z );
486 //F77_caxpy( &m_U,
487 // &gamma,
488 // u1, &rs_U,
489 // z, &inc_z );
490
491 /*------------------------------------------------------------*/
492
493 }
494
495 return FLA_SUCCESS;
496}
double *restrict z1
Definition bl1_dotsv2.c:148
double *restrict y1
Definition bl1_dotsv2.c:147

References bl1_caxpyv(), bl1_cdot(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, i, y1, and z1.

Referenced by FLA_Fused_Uhu_Yhu_Zhu_opt_var1(), and FLA_Hess_UT_step_ofc_var4().

◆ FLA_Fused_Uhu_Yhu_Zhu_opd_var1()

FLA_Error FLA_Fused_Uhu_Yhu_Zhu_opd_var1 ( int  m_U,
int  n_U,
double buff_delta,
double buff_U,
int  rs_U,
int  cs_U,
double buff_Y,
int  rs_Y,
int  cs_Y,
double buff_Z,
int  rs_Z,
int  cs_Z,
double buff_t,
int  inc_t,
double buff_u,
int  inc_u,
double buff_y,
int  inc_y,
double buff_z,
int  inc_z 
)
280{
281 double zero = bl1_d0();
282
283 double* restrict delta = buff_delta;
284 double* restrict u = buff_u;
285 double* restrict y = buff_y;
286 double* restrict z = buff_z;
287
288 double* restrict u1;
289 double* restrict y1;
290 double* restrict z1;
291 double* restrict upsilon1;
292 double* restrict tau1;
293
294 double alpha;
295 double beta;
296 double gamma;
297
298 int i;
299
300 int n_run = n_U / 1;
301 //int n_left = n_U % 1;
302 int step_u1 = 1*cs_U;
303 int step_y1 = 1*cs_Y;
304 int step_z1 = 1*cs_Z;
305 int step_upsilon1 = 1*inc_u;
306 int step_tau1 = 1*inc_t;
307
308 u1 = buff_U;
309 y1 = buff_Y;
310 z1 = buff_Z;
312 tau1 = buff_t;
313
314 for ( i = 0; i < n_run; ++i )
315 {
316 /*------------------------------------------------------------*/
317
318/*
319 bl1_ddotsv3( BLIS1_CONJUGATE,
320 m_U,
321 u1, rs_U,
322 z1, rs_Z,
323 y1, rs_Y,
324 u, inc_u,
325 &zero,
326 &alpha,
327 &beta,
328 &gamma );
329
330 *tau1 = alpha;
331
332 bl1_dscals( delta, &alpha );
333 bl1_dscals( delta, &beta );
334 bl1_dscals( delta, &gamma );
335
336 bl1_daxpyv2b( m_U,
337 &alpha,
338 &beta,
339 y1, rs_Y,
340 u1, rs_U,
341 y, inc_y );
342 bl1_daxpyv2b( m_U,
343 &alpha,
344 &gamma,
345 z1, rs_Z,
346 u1, rs_U,
347 z, inc_z );
348*/
349
351 m_U,
352 y1, rs_Y,
353 z1, rs_Z,
354 u, inc_u,
355 &zero,
356 &beta,
357 &gamma );
358
360 &gamma,
361 &beta,
362 u1, rs_U,
363 u, inc_u,
364 &alpha,
365 y, inc_y,
366 z, inc_z );
367
368 *tau1 = alpha;
369
372 m_U,
373 &alpha,
374 y1, rs_Y,
375 y, inc_y );
377 m_U,
378 &alpha,
379 z1, rs_Z,
380 z, inc_z );
381
382
383 /*------------------------------------------------------------*/
384
385 u1 += step_u1;
386 y1 += step_y1;
387 z1 += step_z1;
389 tau1 += step_tau1;
390 }
391
392
393 return FLA_SUCCESS;
394}
void bl1_daxpyv(conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy)
Definition bl1_axpyv.c:21
void bl1_ddotaxmyv2(int n, double *alpha, double *beta, double *x, int inc_x, double *u, int inc_u, double *rho, double *y, int inc_y, double *z, int inc_z)
Definition bl1_dotaxmyv2.c:34
void bl1_ddotsv2(conj1_t conjxy, int n, double *x, int inc_x, double *y, int inc_y, double *z, int inc_z, double *beta, double *rho_xz, double *rho_yz)
Definition bl1_dotsv2.c:35

References bl1_d0(), bl1_daxpyv(), bl1_ddotaxmyv2(), bl1_ddotsv2(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, i, n_run, upsilon1, y1, and z1.

Referenced by FLA_Fused_Uhu_Yhu_Zhu_opt_var1(), and FLA_Hess_UT_step_ofd_var4().

◆ FLA_Fused_Uhu_Yhu_Zhu_ops_var1()

FLA_Error FLA_Fused_Uhu_Yhu_Zhu_ops_var1 ( int  m_U,
int  n_U,
float buff_delta,
float buff_U,
int  rs_U,
int  cs_U,
float buff_Y,
int  rs_Y,
int  cs_Y,
float buff_Z,
int  rs_Z,
int  cs_Z,
float buff_t,
int  inc_t,
float buff_u,
int  inc_u,
float buff_y,
int  inc_y,
float buff_z,
int  inc_z 
)
166{
167 int i;
168
169 for ( i = 0; i < n_U; ++i )
170 {
171 float* u1 = buff_U + (i )*cs_U + (0 )*rs_U;
172 float* y1 = buff_Y + (i )*cs_Y + (0 )*rs_Y;
173 float* z1 = buff_Z + (i )*cs_Z + (0 )*rs_Z;
174 float* delta = buff_delta;
175 float* tau1 = buff_t + (i )*inc_t;
176 float* u = buff_u;
177 float* y = buff_y;
178 float* z = buff_z;
179 float alpha;
180 float beta;
181 float gamma;
182
183 /*------------------------------------------------------------*/
184
186 m_U,
187 u1, rs_U,
188 u, inc_u,
189 &alpha );
190 //alpha = F77_sdot( &m_U,
191 // u1, &rs_U,
192 // u, &inc_u );
193
195 m_U,
196 z1, rs_Z,
197 u, inc_u,
198 &beta );
199 //beta = F77_sdot( &m_U,
200 // z1, &rs_Z,
201 // u, &inc_u );
202
204 m_U,
205 y1, rs_Y,
206 u, inc_u,
207 &gamma );
208 //gamma = F77_sdot( &m_U,
209 // y1, &rs_Y,
210 // u, &inc_u );
211
212 *tau1 = alpha;
213
214 // bl1_sscals( delta, &alpha );
215 // bl1_sscals( delta, &beta );
216 // bl1_sscals( delta, &gamma );
217 alpha *= *delta;
218 beta *= *delta;
219 gamma *= *delta;
220
222 m_U,
223 &alpha,
224 y1, rs_Y,
225 y, inc_y );
226 //F77_saxpy( &m_U,
227 // &alpha,
228 // y1, &rs_Y,
229 // y, &inc_y );
230
232 m_U,
233 &beta,
234 u1, rs_U,
235 y, inc_y );
236 //F77_saxpy( &m_U,
237 // &beta,
238 // u1, &rs_U,
239 // y, &inc_y );
240
242 m_U,
243 &alpha,
244 z1, rs_Z,
245 z, inc_z );
246 //F77_saxpy( &m_U,
247 // &alpha,
248 // z1, &rs_Z,
249 // z, &inc_z );
250
252 m_U,
253 &gamma,
254 u1, rs_U,
255 z, inc_z );
256 //F77_saxpy( &m_U,
257 // &gamma,
258 // u1, &rs_U,
259 // z, &inc_z );
260
261 /*------------------------------------------------------------*/
262
263 }
264
265 return FLA_SUCCESS;
266}

References bl1_saxpyv(), bl1_sdot(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, i, y1, and z1.

Referenced by FLA_Fused_Uhu_Yhu_Zhu_opt_var1(), and FLA_Hess_UT_step_ofs_var4().

◆ FLA_Fused_Uhu_Yhu_Zhu_opz_var1()

FLA_Error FLA_Fused_Uhu_Yhu_Zhu_opz_var1 ( int  m_U,
int  n_U,
dcomplex buff_delta,
dcomplex buff_U,
int  rs_U,
int  cs_U,
dcomplex buff_Y,
int  rs_Y,
int  cs_Y,
dcomplex buff_Z,
int  rs_Z,
int  cs_Z,
dcomplex buff_t,
int  inc_t,
dcomplex buff_u,
int  inc_u,
dcomplex buff_y,
int  inc_y,
dcomplex buff_z,
int  inc_z 
)
510{
511 dcomplex zero = bl1_z0();
512
517
523
527
528 int i;
529
530 int n_run = n_U / 1;
531 //int n_left = n_U % 1;
532 int step_u1 = 1*cs_U;
533 int step_y1 = 1*cs_Y;
534 int step_z1 = 1*cs_Z;
535 int step_upsilon1 = 1*inc_u;
536 int step_tau1 = 1*inc_t;
537
538 u1 = buff_U;
539 y1 = buff_Y;
540 z1 = buff_Z;
542 tau1 = buff_t;
543
544 for ( i = 0; i < n_run; ++i )
545 {
546 /*------------------------------------------------------------*/
547
548
550 m_U,
551 u1, rs_U,
552 z1, rs_Z,
553 y1, rs_Y,
554 u, inc_u,
555 &zero,
556 &alpha,
557 &beta,
558 &gamma );
559
560 *tau1 = alpha;
561
563 bl1_zscals( delta, &beta );
565
567 &alpha,
568 &beta,
569 y1, rs_Y,
570 u1, rs_U,
571 y, inc_y );
573 &alpha,
574 &gamma,
575 z1, rs_Z,
576 u1, rs_U,
577 z, inc_z );
578
579
580/*
581 bl1_zdotsv2( BLIS1_CONJUGATE,
582 m_U,
583 y1, rs_Y,
584 z1, rs_Z,
585 u, inc_u,
586 &zero,
587 &beta,
588 &gamma );
589
590 bl1_zdotaxmyv2( m_U,
591 &gamma,
592 &beta,
593 u1, rs_U,
594 u, inc_u,
595 &alpha,
596 y, inc_y,
597 z, inc_z );
598
599 *tau1 = alpha;
600
601 bl1_zscals( delta, &alpha );
602 bl1_zaxpyv( BLIS1_NO_CONJUGATE,
603 m_U,
604 &alpha,
605 y1, rs_Y,
606 y, inc_y );
607 bl1_zaxpyv( BLIS1_NO_CONJUGATE,
608 m_U,
609 &alpha,
610 z1, rs_Z,
611 z, inc_z );
612*/
613
614 /*------------------------------------------------------------*/
615
616 u1 += step_u1;
617 y1 += step_y1;
618 z1 += step_z1;
620 tau1 += step_tau1;
621 }
622
623 return FLA_SUCCESS;
624}
bl1_zscals(beta, rho_yz)
void bl1_zdotsv3(conj1_t conjxyw, int n, dcomplex *x, int inc_x, dcomplex *y, int inc_y, dcomplex *w, int inc_w, dcomplex *z, int inc_z, dcomplex *beta, dcomplex *rho_xz, dcomplex *rho_yz, dcomplex *rho_wz)
Definition bl1_dotsv3.c:290

References bl1_z0(), bl1_zaxpyv2b(), bl1_zdotsv3(), bl1_zscals(), BLIS1_CONJUGATE, i, n_run, upsilon1, y1, and z1.

Referenced by FLA_Fused_Uhu_Yhu_Zhu_opt_var1(), and FLA_Hess_UT_step_ofz_var4().

◆ FLA_Hess_UT_blf_var2()

FLA_Error FLA_Hess_UT_blf_var2 ( FLA_Obj  A,
FLA_Obj  T 
)
14{
15 FLA_Obj ATL, ATR, A00, A01, A02,
16 ABL, ABR, A10, A11, A12,
17 A20, A21, A22;
18 FLA_Obj UT, U0,
19 UB, U1,
20 U2;
21 FLA_Obj ZT, Z0,
22 ZB, Z1,
23 Z2;
24 FLA_Obj TL, TR, T0, T1, T2;
25
26 FLA_Obj U, Z;
33 UB_bl;
35 dim_t m_A;
36 dim_t b_alg, b, bb;
37
39
41 m_A = FLA_Obj_length( A );
42
43 FLA_Obj_create( datatype_A, m_A, b_alg, 0, 0, &U );
44 FLA_Obj_create( datatype_A, m_A, b_alg, 0, 0, &Z );
45
46 FLA_Part_2x2( A, &ATL, &ATR,
47 &ABL, &ABR, 0, 0, FLA_TL );
48 FLA_Part_2x1( U, &UT,
49 &UB, 0, FLA_TOP );
50 FLA_Part_2x1( Z, &ZT,
51 &ZB, 0, FLA_TOP );
52 FLA_Part_1x2( T, &TL, &TR, 0, FLA_LEFT );
53
54 while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) )
55 {
56 b = min( FLA_Obj_length( ABR ), b_alg );
57
58 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
59 /* ************* */ /* ******************** */
60 &A10, /**/ &A11, &A12,
61 ABL, /**/ ABR, &A20, /**/ &A21, &A22,
62 b, b, FLA_BR );
64 /* ** */ /* ** */
65 &U1,
66 UB, &U2, b, FLA_BOTTOM );
68 /* ** */ /* ** */
69 &Z1,
70 ZB, &Z2, b, FLA_BOTTOM );
71 FLA_Repart_1x2_to_1x3( TL, /**/ TR, &T0, /**/ &T1, &T2,
72 b, FLA_RIGHT );
73
74 /*------------------------------------------------------------*/
75
77 &none2, &none3, b, b, FLA_TL );
78
79 bb = min( FLA_Obj_length( ABR ) - 1, b_alg );
80
83
84 // [ ABR, T1 ] = FLA_Hess_UT_step_unb_var2( ABR, T1, b );
85 //FLA_Hess_UT_step_unb_var2( ABR, T1_tl );
87 //FLA_Hess_UT_step_opt_var2( ABR, T1_tl );
88
89 // Build UB from ABR, with explicit unit subdiagonal and zeros.
92 &UB_bl, 1, FLA_TOP );
95
96 // ATR = ATR - ATR * UB * inv( triu( T1 ) ) * UB' );
97 if ( FLA_Obj_length( ATR ) > 0 )
98 {
99 // NOTE: We use ZT as temporary workspace.
102 &none2, &none3, bb, bb, FLA_TL );
103
104 // WT_l = ATR * UB_l * inv( triu( T1 ) ).
109
110 // ATR = ATR - WT_l * UB_l'
113 }
114
115 /*------------------------------------------------------------*/
116
117 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
118 A10, A11, /**/ A12,
119 /* ************** */ /* ****************** */
120 &ABL, /**/ &ABR, A20, A21, /**/ A22,
121 FLA_TL );
123 U1,
124 /* ** */ /* ** */
125 &UB, U2, FLA_TOP );
127 Z1,
128 /* ** */ /* ** */
129 &ZB, Z2, FLA_TOP );
130 FLA_Cont_with_1x3_to_1x2( &TL, /**/ &TR, T0, T1, /**/ T2,
131 FLA_LEFT );
132 }
133
134 FLA_Obj_free( &U );
135 FLA_Obj_free( &Z );
136
137 return FLA_SUCCESS;
138}
FLA_Error FLA_Hess_UT_step_ofu_var2(FLA_Obj A, FLA_Obj T)
Definition FLA_Hess_UT_fus_var2.c:18
FLA_Error FLA_Copy_external(FLA_Obj A, FLA_Obj B)
Definition FLA_Copy_external.c:13
FLA_Error FLA_Gemm_external(FLA_Trans transa, FLA_Trans transb, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C)
Definition FLA_Gemm_external.c:13
FLA_Error FLA_Trsm_external(FLA_Side side, FLA_Uplo uplo, FLA_Trans trans, FLA_Diag diag, FLA_Obj alpha, FLA_Obj A, FLA_Obj B)
Definition FLA_Trsm_external.c:13
FLA_Obj FLA_MINUS_ONE
Definition FLA_Init.c:22
FLA_Obj FLA_ONE
Definition FLA_Init.c:18
FLA_Error FLA_Cont_with_3x3_to_2x2(FLA_Obj *ATL, FLA_Obj *ATR, FLA_Obj A00, FLA_Obj A01, FLA_Obj A02, FLA_Obj A10, FLA_Obj A11, FLA_Obj A12, FLA_Obj *ABL, FLA_Obj *ABR, FLA_Obj A20, FLA_Obj A21, FLA_Obj A22, FLA_Quadrant quadrant)
Definition FLA_View.c:304
FLA_Error FLA_Part_2x2(FLA_Obj A, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition FLA_View.c:17
FLA_Error FLA_Cont_with_3x1_to_2x1(FLA_Obj *AT, FLA_Obj A0, FLA_Obj A1, FLA_Obj *AB, FLA_Obj A2, FLA_Side side)
Definition FLA_View.c:428
FLA_Error FLA_Repart_2x1_to_3x1(FLA_Obj AT, FLA_Obj *A0, FLA_Obj *A1, FLA_Obj AB, FLA_Obj *A2, dim_t mb, FLA_Side side)
Definition FLA_View.c:226
FLA_Error FLA_Cont_with_1x3_to_1x2(FLA_Obj *AL, FLA_Obj *AR, FLA_Obj A0, FLA_Obj A1, FLA_Obj A2, FLA_Side side)
Definition FLA_View.c:475
FLA_Error FLA_Obj_create(FLA_Datatype datatype, dim_t m, dim_t n, dim_t rs, dim_t cs, FLA_Obj *obj)
Definition FLA_Obj.c:55
FLA_Error FLA_Part_1x2(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition FLA_View.c:110
FLA_Error FLA_Part_2x1(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t mb, FLA_Side side)
Definition FLA_View.c:76
dim_t FLA_Obj_length(FLA_Obj obj)
Definition FLA_Query.c:116
FLA_Error FLA_Repart_2x2_to_3x3(FLA_Obj ATL, FLA_Obj ATR, FLA_Obj *A00, FLA_Obj *A01, FLA_Obj *A02, FLA_Obj *A10, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj ABL, FLA_Obj ABR, FLA_Obj *A20, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition FLA_View.c:142
FLA_Error FLA_Repart_1x2_to_1x3(FLA_Obj AL, FLA_Obj AR, FLA_Obj *A0, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition FLA_View.c:267
FLA_Error FLA_Obj_free(FLA_Obj *obj)
Definition FLA_Obj.c:588
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition FLA_Query.c:13
int FLA_Datatype
Definition FLA_type_defs.h:49
unsigned long dim_t
Definition FLA_type_defs.h:71
FLA_Error FLA_Triangularize(FLA_Uplo uplo, FLA_Diag diag, FLA_Obj A)
Definition FLA_Triangularize.c:13
FLA_Error FLA_Set(FLA_Obj alpha, FLA_Obj A)
Definition FLA_Set.c:13
Definition FLA_type_defs.h:159

References FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copy_external(), FLA_Gemm_external(), FLA_Hess_UT_step_ofu_var2(), FLA_MINUS_ONE, FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Set(), FLA_Triangularize(), FLA_Trsm_external(), FLA_ZERO, and i.

Referenced by FLA_Hess_UT_internal().

◆ FLA_Hess_UT_blf_var3()

FLA_Error FLA_Hess_UT_blf_var3 ( FLA_Obj  A,
FLA_Obj  T 
)
14{
15 FLA_Obj ATL, ATR, A00, A01, A02,
16 ABL, ABR, A10, A11, A12,
17 A20, A21, A22;
18 FLA_Obj UT, U0,
19 UB, U1,
20 U2;
21 FLA_Obj ZT, Z0,
22 ZB, Z1,
23 Z2;
24 FLA_Obj TL, TR, T0, T1, T2;
25
26 FLA_Obj U, Z;
33 UB_bl;
35 dim_t m_A;
36 dim_t b_alg, b, bb;
37
39
41 m_A = FLA_Obj_length( A );
42
43 FLA_Obj_create( datatype_A, m_A, b_alg, 0, 0, &U );
44 FLA_Obj_create( datatype_A, m_A, b_alg, 0, 0, &Z );
45
46 FLA_Part_2x2( A, &ATL, &ATR,
47 &ABL, &ABR, 0, 0, FLA_TL );
48 FLA_Part_2x1( U, &UT,
49 &UB, 0, FLA_TOP );
50 FLA_Part_2x1( Z, &ZT,
51 &ZB, 0, FLA_TOP );
52 FLA_Part_1x2( T, &TL, &TR, 0, FLA_LEFT );
53
54 while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) )
55 {
56 b = min( FLA_Obj_length( ABR ), b_alg );
57
58 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
59 /* ************* */ /* ******************** */
60 &A10, /**/ &A11, &A12,
61 ABL, /**/ ABR, &A20, /**/ &A21, &A22,
62 b, b, FLA_BR );
64 /* ** */ /* ** */
65 &U1,
66 UB, &U2, b, FLA_BOTTOM );
68 /* ** */ /* ** */
69 &Z1,
70 ZB, &Z2, b, FLA_BOTTOM );
71 FLA_Repart_1x2_to_1x3( TL, /**/ TR, &T0, /**/ &T1, &T2,
72 b, FLA_RIGHT );
73
74 /*------------------------------------------------------------*/
75
77 &none2, &none3, b, b, FLA_TL );
78
79 bb = min( FLA_Obj_length( ABR ) - 1, b_alg );
80
83
84 // [ ABR, T1 ] = FLA_Hess_UT_step_unb_var3( ABR, T1, b );
85 //FLA_Hess_UT_step_unb_var3( ABR, T1_tl );
87 //FLA_Hess_UT_step_opt_var3( ABR, T1_tl );
88
89 // Build UB from ABR, with explicit unit subdiagonal and zeros.
92 &UB_bl, 1, FLA_TOP );
95
96 // ATR = ATR - ATR * UB * inv( triu( T1 ) ) * UB' );
97 if ( FLA_Obj_length( ATR ) > 0 )
98 {
99 // NOTE: We use ZT as temporary workspace.
102 &none2, &none3, bb, bb, FLA_TL );
103
104 // WT_l = ATR * UB_l * inv( triu( T1 ) ).
109
110 // ATR = ATR - WT_l * UB_l'
113 }
114
115 /*------------------------------------------------------------*/
116
117 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
118 A10, A11, /**/ A12,
119 /* ************** */ /* ****************** */
120 &ABL, /**/ &ABR, A20, A21, /**/ A22,
121 FLA_TL );
123 U1,
124 /* ** */ /* ** */
125 &UB, U2, FLA_TOP );
127 Z1,
128 /* ** */ /* ** */
129 &ZB, Z2, FLA_TOP );
130 FLA_Cont_with_1x3_to_1x2( &TL, /**/ &TR, T0, T1, /**/ T2,
131 FLA_LEFT );
132 }
133
134 FLA_Obj_free( &U );
135 FLA_Obj_free( &Z );
136
137 return FLA_SUCCESS;
138}
FLA_Error FLA_Hess_UT_step_ofu_var3(FLA_Obj A, FLA_Obj T)
Definition FLA_Hess_UT_fus_var3.c:18

References FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copy_external(), FLA_Gemm_external(), FLA_Hess_UT_step_ofu_var3(), FLA_MINUS_ONE, FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Set(), FLA_Triangularize(), FLA_Trsm_external(), FLA_ZERO, and i.

Referenced by FLA_Hess_UT_internal().

◆ FLA_Hess_UT_blf_var4()

FLA_Error FLA_Hess_UT_blf_var4 ( FLA_Obj  A,
FLA_Obj  T 
)
14{
15 FLA_Obj ATL, ATR, A00, A01, A02,
16 ABL, ABR, A10, A11, A12,
17 A20, A21, A22;
18 FLA_Obj UT, U0,
19 UB, U1,
20 U2;
21 FLA_Obj YT, Y0,
22 YB, Y1,
23 Y2;
24 FLA_Obj ZT, Z0,
25 ZB, Z1,
26 Z2;
27 FLA_Obj TL, TR, T0, T1, T2;
28
29 FLA_Obj U, Y, Z;
38 UB_bl;
40 dim_t m_A;
41 dim_t b_alg, b, bb;
42
44
46 m_A = FLA_Obj_length( A );
47
48 FLA_Obj_create( datatype_A, m_A, b_alg, 0, 0, &U );
49 FLA_Obj_create( datatype_A, m_A, b_alg, 0, 0, &Y );
50 FLA_Obj_create( datatype_A, m_A, b_alg, 0, 0, &Z );
51
52 FLA_Part_2x2( A, &ATL, &ATR,
53 &ABL, &ABR, 0, 0, FLA_TL );
54 FLA_Part_2x1( U, &UT,
55 &UB, 0, FLA_TOP );
56 FLA_Part_2x1( Y, &YT,
57 &YB, 0, FLA_TOP );
58 FLA_Part_2x1( Z, &ZT,
59 &ZB, 0, FLA_TOP );
60 FLA_Part_1x2( T, &TL, &TR, 0, FLA_LEFT );
61
62 while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) )
63 {
64 b = min( FLA_Obj_length( ABR ), b_alg );
65
66 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
67 /* ************* */ /* ******************** */
68 &A10, /**/ &A11, &A12,
69 ABL, /**/ ABR, &A20, /**/ &A21, &A22,
70 b, b, FLA_BR );
72 /* ** */ /* ** */
73 &U1,
74 UB, &U2, b, FLA_BOTTOM );
76 /* ** */ /* ** */
77 &Y1,
78 YB, &Y2, b, FLA_BOTTOM );
80 /* ** */ /* ** */
81 &Z1,
82 ZB, &Z2, b, FLA_BOTTOM );
83 FLA_Repart_1x2_to_1x3( TL, /**/ TR, &T0, /**/ &T1, &T2,
84 b, FLA_RIGHT );
85
86 /*------------------------------------------------------------*/
87
89 &none2, &none3, b, b, FLA_TL );
90
91 bb = min( FLA_Obj_length( ABR ) - 1, b_alg );
92
97
99 &U2_l, b, FLA_TOP );
101 &Y2_l, b, FLA_TOP );
103 &Z2_l, b, FLA_TOP );
104
105 // [ ABR, YB, ZB, T1 ] = FLA_Hess_UT_step_unb_var4( ABR, YB, ZB, T1, b );
106 //FLA_Hess_UT_step_unb_var4( ABR, YB, ZB, T1_tl );
108 //FLA_Hess_UT_step_opt_var4( ABR, YB, ZB, T1_tl );
109
110 // Build UB from ABR, with explicit unit subdiagonal and zeros.
113 &UB_bl, 1, FLA_TOP );
116
117 // ATR = ATR - ATR * UB * inv( triu( T ) ) * UB' );
118 if ( FLA_Obj_length( ATR ) > 0 )
119 {
120 // NOTE: We use ZT as temporary workspace.
123 &none2, &none3, bb, bb, FLA_TL );
124
125 // WT_l = ATR * UB_l * inv( triu( T ) ).
130
131 // ATR = ATR - WT_l * UB_l'
134 }
135
136 // A22 = A22 - U2 * Y2' - Z2 * U2';
141
142 /*------------------------------------------------------------*/
143
144 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
145 A10, A11, /**/ A12,
146 /* ************** */ /* ****************** */
147 &ABL, /**/ &ABR, A20, A21, /**/ A22,
148 FLA_TL );
150 U1,
151 /* ** */ /* ** */
152 &UB, U2, FLA_TOP );
154 Y1,
155 /* ** */ /* ** */
156 &YB, Y2, FLA_TOP );
158 Z1,
159 /* ** */ /* ** */
160 &ZB, Z2, FLA_TOP );
161 FLA_Cont_with_1x3_to_1x2( &TL, /**/ &TR, T0, T1, /**/ T2,
162 FLA_LEFT );
163 }
164
165 FLA_Obj_free( &U );
166 FLA_Obj_free( &Y );
167 FLA_Obj_free( &Z );
168
169 return FLA_SUCCESS;
170}
FLA_Error FLA_Hess_UT_step_ofu_var4(FLA_Obj A, FLA_Obj Y, FLA_Obj Z, FLA_Obj T)
Definition FLA_Hess_UT_fus_var4.c:29

References FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copy_external(), FLA_Gemm_external(), FLA_Hess_UT_step_ofu_var4(), FLA_MINUS_ONE, FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Set(), FLA_Triangularize(), FLA_Trsm_external(), FLA_ZERO, and i.

Referenced by FLA_Hess_UT_internal().

◆ FLA_Hess_UT_blk_var1()

FLA_Error FLA_Hess_UT_blk_var1 ( FLA_Obj  A,
FLA_Obj  T 
)
14{
15 FLA_Obj ATL, ATR, A00, A01, A02,
16 ABL, ABR, A10, A11, A12,
17 A20, A21, A22;
18 FLA_Obj UT, U0,
19 UB, U1,
20 U2;
21 FLA_Obj ZT, Z0,
22 ZB, Z1,
23 Z2;
24 FLA_Obj TL, TR, T0, T1, T2;
25
26 FLA_Obj U, Z;
33 UB_bl;
35 dim_t m_A;
36 dim_t b_alg, b, bb;
37
39
41 m_A = FLA_Obj_length( A );
42
43 FLA_Obj_create( datatype_A, m_A, b_alg, 0, 0, &U );
44 FLA_Obj_create( datatype_A, m_A, b_alg, 0, 0, &Z );
45
46 FLA_Part_2x2( A, &ATL, &ATR,
47 &ABL, &ABR, 0, 0, FLA_TL );
48 FLA_Part_2x1( U, &UT,
49 &UB, 0, FLA_TOP );
50 FLA_Part_2x1( Z, &ZT,
51 &ZB, 0, FLA_TOP );
52 FLA_Part_1x2( T, &TL, &TR, 0, FLA_LEFT );
53
54 while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) )
55 {
56 b = min( FLA_Obj_length( ABR ), b_alg );
57
58 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
59 /* ************* */ /* ******************** */
60 &A10, /**/ &A11, &A12,
61 ABL, /**/ ABR, &A20, /**/ &A21, &A22,
62 b, b, FLA_BR );
64 /* ** */ /* ** */
65 &U1,
66 UB, &U2, b, FLA_BOTTOM );
68 /* ** */ /* ** */
69 &Z1,
70 ZB, &Z2, b, FLA_BOTTOM );
71 FLA_Repart_1x2_to_1x3( TL, /**/ TR, &T0, /**/ &T1, &T2,
72 b, FLA_RIGHT );
73
74 /*------------------------------------------------------------*/
75
77 &none2, &none3, b, b, FLA_TL );
78
79 bb = min( FLA_Obj_length( ABR ) - 1, b_alg );
80
83
84 // [ ABR, T1 ] = FLA_Hess_UT_step_unb_var1( ABR, T1, b );
85 //FLA_Hess_UT_step_unb_var1( ABR, T1_tl );
87
88 // Build UB from ABR, with explicit unit subdiagonal and zeros.
91 &UB_bl, 1, FLA_TOP );
94
95 // ATR = ATR - ATR * UB * inv( triu( T1 ) ) * UB' );
96 if ( FLA_Obj_length( ATR ) > 0 )
97 {
98 // NOTE: We use ZT as temporary workspace.
101 &none2, &none3, bb, bb, FLA_TL );
102
103 // WT_l = ATR * UB_l * inv( triu( T1 ) ).
108
109 // ATR = ATR - WT_l * UB_l'
112 }
113
114 /*------------------------------------------------------------*/
115
116 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
117 A10, A11, /**/ A12,
118 /* ************** */ /* ****************** */
119 &ABL, /**/ &ABR, A20, A21, /**/ A22,
120 FLA_TL );
122 U1,
123 /* ** */ /* ** */
124 &UB, U2, FLA_TOP );
126 Z1,
127 /* ** */ /* ** */
128 &ZB, Z2, FLA_TOP );
129 FLA_Cont_with_1x3_to_1x2( &TL, /**/ &TR, T0, T1, /**/ T2,
130 FLA_LEFT );
131 }
132
133 FLA_Obj_free( &U );
134 FLA_Obj_free( &Z );
135
136 return FLA_SUCCESS;
137}
FLA_Error FLA_Hess_UT_step_opt_var1(FLA_Obj A, FLA_Obj T)
Definition FLA_Hess_UT_opt_var1.c:18

References FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copy_external(), FLA_Gemm_external(), FLA_Hess_UT_step_opt_var1(), FLA_MINUS_ONE, FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Set(), FLA_Triangularize(), FLA_Trsm_external(), FLA_ZERO, and i.

Referenced by FLA_Hess_UT_internal().

◆ FLA_Hess_UT_blk_var2()

FLA_Error FLA_Hess_UT_blk_var2 ( FLA_Obj  A,
FLA_Obj  T 
)
14{
15 FLA_Obj ATL, ATR, A00, A01, A02,
16 ABL, ABR, A10, A11, A12,
17 A20, A21, A22;
18 FLA_Obj UT, U0,
19 UB, U1,
20 U2;
21 FLA_Obj ZT, Z0,
22 ZB, Z1,
23 Z2;
24 FLA_Obj TL, TR, T0, T1, T2;
25
26 FLA_Obj U, Z;
33 UB_bl;
35 dim_t m_A;
36 dim_t b_alg, b, bb;
37
39
41 m_A = FLA_Obj_length( A );
42
43 FLA_Obj_create( datatype_A, m_A, b_alg, 0, 0, &U );
44 FLA_Obj_create( datatype_A, m_A, b_alg, 0, 0, &Z );
45
46 FLA_Part_2x2( A, &ATL, &ATR,
47 &ABL, &ABR, 0, 0, FLA_TL );
48 FLA_Part_2x1( U, &UT,
49 &UB, 0, FLA_TOP );
50 FLA_Part_2x1( Z, &ZT,
51 &ZB, 0, FLA_TOP );
52 FLA_Part_1x2( T, &TL, &TR, 0, FLA_LEFT );
53
54 while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) )
55 {
56 b = min( FLA_Obj_length( ABR ), b_alg );
57
58 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
59 /* ************* */ /* ******************** */
60 &A10, /**/ &A11, &A12,
61 ABL, /**/ ABR, &A20, /**/ &A21, &A22,
62 b, b, FLA_BR );
64 /* ** */ /* ** */
65 &U1,
66 UB, &U2, b, FLA_BOTTOM );
68 /* ** */ /* ** */
69 &Z1,
70 ZB, &Z2, b, FLA_BOTTOM );
71 FLA_Repart_1x2_to_1x3( TL, /**/ TR, &T0, /**/ &T1, &T2,
72 b, FLA_RIGHT );
73
74 /*------------------------------------------------------------*/
75
77 &none2, &none3, b, b, FLA_TL );
78
79 bb = min( FLA_Obj_length( ABR ) - 1, b_alg );
80
83
84 // [ ABR, T1 ] = FLA_Hess_UT_step_unb_var2( ABR, T1, b );
85 //FLA_Hess_UT_step_unb_var2( ABR, T1_tl );
86 //FLA_Hess_UT_step_ofu_var2( ABR, T1_tl );
88
89 // Build UB from ABR, with explicit unit subdiagonal and zeros.
92 &UB_bl, 1, FLA_TOP );
95
96 // ATR = ATR - ATR * UB * inv( triu( T1 ) ) * UB' );
97 if ( FLA_Obj_length( ATR ) > 0 )
98 {
99 // NOTE: We use ZT as temporary workspace.
102 &none2, &none3, bb, bb, FLA_TL );
103
104 // WT_l = ATR * UB_l * inv( triu( T1 ) ).
109
110 // ATR = ATR - WT_l * UB_l'
113 }
114
115 /*------------------------------------------------------------*/
116
117 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
118 A10, A11, /**/ A12,
119 /* ************** */ /* ****************** */
120 &ABL, /**/ &ABR, A20, A21, /**/ A22,
121 FLA_TL );
123 U1,
124 /* ** */ /* ** */
125 &UB, U2, FLA_TOP );
127 Z1,
128 /* ** */ /* ** */
129 &ZB, Z2, FLA_TOP );
130 FLA_Cont_with_1x3_to_1x2( &TL, /**/ &TR, T0, T1, /**/ T2,
131 FLA_LEFT );
132 }
133
134 FLA_Obj_free( &U );
135 FLA_Obj_free( &Z );
136
137 return FLA_SUCCESS;
138}
FLA_Error FLA_Hess_UT_step_opt_var2(FLA_Obj A, FLA_Obj T)
Definition FLA_Hess_UT_opt_var2.c:18

References FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copy_external(), FLA_Gemm_external(), FLA_Hess_UT_step_opt_var2(), FLA_MINUS_ONE, FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Set(), FLA_Triangularize(), FLA_Trsm_external(), FLA_ZERO, and i.

Referenced by FLA_Hess_UT_internal().

◆ FLA_Hess_UT_blk_var3()

FLA_Error FLA_Hess_UT_blk_var3 ( FLA_Obj  A,
FLA_Obj  T 
)
14{
15 FLA_Obj ATL, ATR, A00, A01, A02,
16 ABL, ABR, A10, A11, A12,
17 A20, A21, A22;
18 FLA_Obj UT, U0,
19 UB, U1,
20 U2;
21 FLA_Obj ZT, Z0,
22 ZB, Z1,
23 Z2;
24 FLA_Obj TL, TR, T0, T1, T2;
25
26 FLA_Obj U, Z;
33 UB_bl;
35 dim_t m_A;
36 dim_t b_alg, b, bb;
37
39
41 m_A = FLA_Obj_length( A );
42
43 FLA_Obj_create( datatype_A, m_A, b_alg, 0, 0, &U );
44 FLA_Obj_create( datatype_A, m_A, b_alg, 0, 0, &Z );
45
46 FLA_Part_2x2( A, &ATL, &ATR,
47 &ABL, &ABR, 0, 0, FLA_TL );
48 FLA_Part_2x1( U, &UT,
49 &UB, 0, FLA_TOP );
50 FLA_Part_2x1( Z, &ZT,
51 &ZB, 0, FLA_TOP );
52 FLA_Part_1x2( T, &TL, &TR, 0, FLA_LEFT );
53
54 while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) )
55 {
56 b = min( FLA_Obj_length( ABR ), b_alg );
57
58 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
59 /* ************* */ /* ******************** */
60 &A10, /**/ &A11, &A12,
61 ABL, /**/ ABR, &A20, /**/ &A21, &A22,
62 b, b, FLA_BR );
64 /* ** */ /* ** */
65 &U1,
66 UB, &U2, b, FLA_BOTTOM );
68 /* ** */ /* ** */
69 &Z1,
70 ZB, &Z2, b, FLA_BOTTOM );
71 FLA_Repart_1x2_to_1x3( TL, /**/ TR, &T0, /**/ &T1, &T2,
72 b, FLA_RIGHT );
73
74 /*------------------------------------------------------------*/
75
77 &none2, &none3, b, b, FLA_TL );
78
79 bb = min( FLA_Obj_length( ABR ) - 1, b_alg );
80
83
84 // [ ABR, T1 ] = FLA_Hess_UT_step_unb_var3( ABR, T1, b );
85 //FLA_Hess_UT_step_unb_var3( ABR, T1_tl );
86 //FLA_Hess_UT_step_ofu_var3( ABR, T1_tl );
88
89 // Build UB from ABR, with explicit unit subdiagonal and zeros.
92 &UB_bl, 1, FLA_TOP );
95
96 // ATR = ATR - ATR * UB * inv( triu( T1 ) ) * UB' );
97 if ( FLA_Obj_length( ATR ) > 0 )
98 {
99 // NOTE: We use ZT as temporary workspace.
102 &none2, &none3, bb, bb, FLA_TL );
103
104 // WT_l = ATR * UB_l * inv( triu( T1 ) ).
109
110 // ATR = ATR - WT_l * UB_l'
113 }
114
115 /*------------------------------------------------------------*/
116
117 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
118 A10, A11, /**/ A12,
119 /* ************** */ /* ****************** */
120 &ABL, /**/ &ABR, A20, A21, /**/ A22,
121 FLA_TL );
123 U1,
124 /* ** */ /* ** */
125 &UB, U2, FLA_TOP );
127 Z1,
128 /* ** */ /* ** */
129 &ZB, Z2, FLA_TOP );
130 FLA_Cont_with_1x3_to_1x2( &TL, /**/ &TR, T0, T1, /**/ T2,
131 FLA_LEFT );
132 }
133
134 FLA_Obj_free( &U );
135 FLA_Obj_free( &Z );
136
137 return FLA_SUCCESS;
138}
FLA_Error FLA_Hess_UT_step_opt_var3(FLA_Obj A, FLA_Obj T)
Definition FLA_Hess_UT_opt_var3.c:18

References FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copy_external(), FLA_Gemm_external(), FLA_Hess_UT_step_opt_var3(), FLA_MINUS_ONE, FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Set(), FLA_Triangularize(), FLA_Trsm_external(), FLA_ZERO, and i.

Referenced by FLA_Hess_UT_internal().

◆ FLA_Hess_UT_blk_var4()

FLA_Error FLA_Hess_UT_blk_var4 ( FLA_Obj  A,
FLA_Obj  T 
)
14{
15 FLA_Obj ATL, ATR, A00, A01, A02,
16 ABL, ABR, A10, A11, A12,
17 A20, A21, A22;
18 FLA_Obj UT, U0,
19 UB, U1,
20 U2;
21 FLA_Obj YT, Y0,
22 YB, Y1,
23 Y2;
24 FLA_Obj ZT, Z0,
25 ZB, Z1,
26 Z2;
27 FLA_Obj TL, TR, T0, T1, T2;
28
29 FLA_Obj U, Y, Z;
38 UB_bl;
40 dim_t m_A;
41 dim_t b_alg, b, bb;
42
44
46 m_A = FLA_Obj_length( A );
47
48 FLA_Obj_create( datatype_A, m_A, b_alg, 0, 0, &U );
49 FLA_Obj_create( datatype_A, m_A, b_alg, 0, 0, &Y );
50 FLA_Obj_create( datatype_A, m_A, b_alg, 0, 0, &Z );
51
52 FLA_Part_2x2( A, &ATL, &ATR,
53 &ABL, &ABR, 0, 0, FLA_TL );
54 FLA_Part_2x1( U, &UT,
55 &UB, 0, FLA_TOP );
56 FLA_Part_2x1( Y, &YT,
57 &YB, 0, FLA_TOP );
58 FLA_Part_2x1( Z, &ZT,
59 &ZB, 0, FLA_TOP );
60 FLA_Part_1x2( T, &TL, &TR, 0, FLA_LEFT );
61
62 while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) )
63 {
64 b = min( FLA_Obj_length( ABR ), b_alg );
65
66 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
67 /* ************* */ /* ******************** */
68 &A10, /**/ &A11, &A12,
69 ABL, /**/ ABR, &A20, /**/ &A21, &A22,
70 b, b, FLA_BR );
72 /* ** */ /* ** */
73 &U1,
74 UB, &U2, b, FLA_BOTTOM );
76 /* ** */ /* ** */
77 &Y1,
78 YB, &Y2, b, FLA_BOTTOM );
80 /* ** */ /* ** */
81 &Z1,
82 ZB, &Z2, b, FLA_BOTTOM );
83 FLA_Repart_1x2_to_1x3( TL, /**/ TR, &T0, /**/ &T1, &T2,
84 b, FLA_RIGHT );
85
86 /*------------------------------------------------------------*/
87
89 &none2, &none3, b, b, FLA_TL );
90
91 bb = min( FLA_Obj_length( ABR ) - 1, b_alg );
92
97
99 &U2_l, b, FLA_TOP );
101 &Y2_l, b, FLA_TOP );
103 &Z2_l, b, FLA_TOP );
104
105 // [ ABR, YB, ZB, T1 ] = FLA_Hess_UT_step_unb_var4( ABR, YB, ZB, T1, b );
106 //FLA_Hess_UT_step_unb_var4( ABR, YB, ZB, T1_tl );
107 //FLA_Hess_UT_step_ofu_var4( ABR, YB, ZB, T1_tl );
109
110 // Build UB from ABR, with explicit unit subdiagonal and zeros.
113 &UB_bl, 1, FLA_TOP );
116
117 // ATR = ATR - ATR * UB * inv( triu( T ) ) * UB' );
118 if ( FLA_Obj_length( ATR ) > 0 )
119 {
120 // NOTE: We use ZT as temporary workspace.
123 &none2, &none3, bb, bb, FLA_TL );
124
125 // WT_l = ATR * UB_l * inv( triu( T ) ).
130
131 // ATR = ATR - WT_l * UB_l'
134 }
135
136 // A22 = A22 - U2 * Y2' - Z2 * U2';
141
142 /*------------------------------------------------------------*/
143
144 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
145 A10, A11, /**/ A12,
146 /* ************** */ /* ****************** */
147 &ABL, /**/ &ABR, A20, A21, /**/ A22,
148 FLA_TL );
150 U1,
151 /* ** */ /* ** */
152 &UB, U2, FLA_TOP );
154 Y1,
155 /* ** */ /* ** */
156 &YB, Y2, FLA_TOP );
158 Z1,
159 /* ** */ /* ** */
160 &ZB, Z2, FLA_TOP );
161 FLA_Cont_with_1x3_to_1x2( &TL, /**/ &TR, T0, T1, /**/ T2,
162 FLA_LEFT );
163 }
164
165 FLA_Obj_free( &U );
166 FLA_Obj_free( &Y );
167 FLA_Obj_free( &Z );
168
169 return FLA_SUCCESS;
170}
FLA_Error FLA_Hess_UT_step_opt_var4(FLA_Obj A, FLA_Obj Y, FLA_Obj Z, FLA_Obj T)
Definition FLA_Hess_UT_opt_var4.c:29

References FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copy_external(), FLA_Gemm_external(), FLA_Hess_UT_step_opt_var4(), FLA_MINUS_ONE, FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Set(), FLA_Triangularize(), FLA_Trsm_external(), FLA_ZERO, and i.

Referenced by FLA_Hess_UT_internal().

◆ FLA_Hess_UT_blk_var5()

FLA_Error FLA_Hess_UT_blk_var5 ( FLA_Obj  A,
FLA_Obj  T 
)
14{
15 FLA_Obj ATL, ATR, A00, A01, A02,
16 ABL, ABR, A10, A11, A12,
17 A20, A21, A22;
18 FLA_Obj UT, U0,
19 UB, U1,
20 U2;
21 FLA_Obj ZT, Z0,
22 ZB, Z1,
23 Z2;
24 FLA_Obj TL, TR, T0, T1, W12;
25
26 FLA_Obj U, Z;
33 dim_t m_A;
34 dim_t b_alg, b, bb;
35
37
39 m_A = FLA_Obj_length( A );
40
41 FLA_Obj_create( datatype_A, m_A, b_alg, 0, 0, &U );
42 FLA_Obj_create( datatype_A, m_A, b_alg, 0, 0, &Z );
43
44 FLA_Part_2x2( A, &ATL, &ATR,
45 &ABL, &ABR, 0, 0, FLA_TL );
46 FLA_Part_2x1( U, &UT,
47 &UB, 0, FLA_TOP );
48 FLA_Part_2x1( Z, &ZT,
49 &ZB, 0, FLA_TOP );
50 FLA_Part_1x2( T, &TL, &TR, 0, FLA_LEFT );
51
52 while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) )
53 {
54 b = min( FLA_Obj_length( ABR ), b_alg );
55
56 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
57 /* ************* */ /* ******************** */
58 &A10, /**/ &A11, &A12,
59 ABL, /**/ ABR, &A20, /**/ &A21, &A22,
60 b, b, FLA_BR );
62 /* ** */ /* ** */
63 &U1,
64 UB, &U2, b, FLA_BOTTOM );
66 /* ** */ /* ** */
67 &Z1,
68 ZB, &Z2, b, FLA_BOTTOM );
69 FLA_Repart_1x2_to_1x3( TL, /**/ TR, &T0, /**/ &T1, &W12,
70 b, FLA_RIGHT );
71
72 /*------------------------------------------------------------*/
73
75 &none2, &none3, b, b, FLA_TL );
76
77 bb = min( FLA_Obj_length( ABR ) - 1, b_alg );
78
81
82 // [ ABR, UB, ZB, T1 ] = FLA_Hess_UT_step_unb_var5( ABR, UB, ZB, T1, b );
83 //FLA_Hess_UT_step_unb_var5( ABR, UB, ZB, T1_tl );
85
86 // ATR = ATR - ATR * UB * inv( triu ( T1 ) ) * UB' );
87 if ( FLA_Obj_length( ATR ) > 0 )
88 {
89 // NOTE: We use ZT as temporary workspace.
92 &none2, &none3, bb, bb, FLA_TL );
93
94 // WT_l = ATR * UB_l * inv( triu( T1 ) ).
99
100 // ATR = ATR - WT_l * UB_l'
103 }
104
105 // / A12 \ = Q11' * / / A12 \ - / Z1 \ * inv( triu( T1 ) ) * U2' \
106 // \ A22 / \ \ A22 / \ Z2 / /
107 //
108 // where Q11 corresponds to the block Householder transformation
109 // associated with UB and T1.
110 if ( FLA_Obj_width( A12 ) > 0 )
111 {
114
115 // NOTE: Since A12.n > 0, we are guaranteed to not be at an edge case,
116 // namely the case where bb = b - 1 = ABR.m - 1, thus we are free to use
117 // the "full" matrix partitions in this scope block (ie: ZB instead of
118 // ZB_l).
119
120 // W12 = U2'
121 // W12 = inv( triu( T1 ) ) * W12;
125
127 A22, &ABR2 );
128
129 // / A12 \ = / A12 \ - / Z1 \ * W12
130 // \ A22 / \ A22 / \ Z2 /
133
134 // Omit the top row of UB so it has [implicit] unit diagonal, allowing us
135 // to use FLA_Apply_Q_UT() to apply the block Householder transformation
136 // corresponding to UB and T1. This trick is valid since the top row of
137 // ABR2 would normally be unchanged by the transformation (ie: multiplied
138 // by identity).
139 FLA_Part_2x1( UB, &none,
140 &UB_b, 1, FLA_TOP );
142 &ABR2_b, 1, FLA_TOP );
143
144 // Apply Q11' to A12 and A22 from the left:
145 //
146 // / A12 \ = / I - / U1 \ * inv( triu( T1 ) ) * / U1 \' \' / A12 \
147 // \ A22 / \ \ U2 / \ U2 / / \ A22 /
148 //
150 UB_b, T1_tl, W12, ABR2_b );
151 }
152
153 /*------------------------------------------------------------*/
154
155 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
156 A10, A11, /**/ A12,
157 /* ************** */ /* ****************** */
158 &ABL, /**/ &ABR, A20, A21, /**/ A22,
159 FLA_TL );
161 U1,
162 /* ** */ /* ** */
163 &UB, U2, FLA_TOP );
165 Z1,
166 /* ** */ /* ** */
167 &ZB, Z2, FLA_TOP );
168 FLA_Cont_with_1x3_to_1x2( &TL, /**/ &TR, T0, T1, /**/ W12,
169 FLA_LEFT );
170 }
171
172 FLA_Obj_free( &U );
173 FLA_Obj_free( &Z );
174
175 return FLA_SUCCESS;
176}
FLA_Error FLA_Hess_UT_step_opt_var5(FLA_Obj A, FLA_Obj U, FLA_Obj Z, FLA_Obj T)
Definition FLA_Hess_UT_opt_var5.c:29
FLA_Error FLA_Copyt_external(FLA_Trans trans, FLA_Obj A, FLA_Obj B)
Definition FLA_Copyt_external.c:13
FLA_Error FLA_Apply_Q_UT(FLA_Side side, FLA_Trans trans, FLA_Direct direct, FLA_Store storev, FLA_Obj A, FLA_Obj T, FLA_Obj W, FLA_Obj B)
Definition FLA_Apply_Q_UT.c:16
dim_t FLA_Obj_width(FLA_Obj obj)
Definition FLA_Query.c:123
FLA_Error FLA_Merge_2x1(FLA_Obj AT, FLA_Obj AB, FLA_Obj *A)
Definition FLA_View.c:541

References FLA_Apply_Q_UT(), FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copyt_external(), FLA_Gemm_external(), FLA_Hess_UT_step_opt_var5(), FLA_Merge_2x1(), FLA_MINUS_ONE, FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_Obj_width(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Trsm_external(), FLA_ZERO, and i.

Referenced by FLA_Hess_UT_internal().

◆ FLA_Hess_UT_ofu_var1()

FLA_Error FLA_Hess_UT_ofu_var1 ( FLA_Obj  A,
FLA_Obj  T 
)

◆ FLA_Hess_UT_ofu_var2()

FLA_Error FLA_Hess_UT_ofu_var2 ( FLA_Obj  A,
FLA_Obj  T 
)
14{
15 return FLA_Hess_UT_step_ofu_var2( A, T );
16}

References FLA_Hess_UT_step_ofu_var2(), and i.

◆ FLA_Hess_UT_ofu_var3()

FLA_Error FLA_Hess_UT_ofu_var3 ( FLA_Obj  A,
FLA_Obj  T 
)
14{
15 return FLA_Hess_UT_step_ofu_var3( A, T );
16}

References FLA_Hess_UT_step_ofu_var3(), and i.

◆ FLA_Hess_UT_ofu_var4()

FLA_Error FLA_Hess_UT_ofu_var4 ( FLA_Obj  A,
FLA_Obj  T 
)
14{
16 FLA_Obj Y, Z;
17
20
22
23 FLA_Obj_free( &Y );
24 FLA_Obj_free( &Z );
25
26 return r_val;
27}
FLA_Error FLA_Obj_create_conf_to(FLA_Trans trans, FLA_Obj old, FLA_Obj *obj)
Definition FLA_Obj.c:286
int FLA_Error
Definition FLA_type_defs.h:47

References FLA_Hess_UT_step_ofu_var4(), FLA_Obj_create_conf_to(), FLA_Obj_free(), and i.

◆ FLA_Hess_UT_opt_var1()

FLA_Error FLA_Hess_UT_opt_var1 ( FLA_Obj  A,
FLA_Obj  T 
)
14{
15 return FLA_Hess_UT_step_opt_var1( A, T );
16}

References FLA_Hess_UT_step_opt_var1(), and i.

Referenced by FLA_Hess_UT_internal().

◆ FLA_Hess_UT_opt_var2()

FLA_Error FLA_Hess_UT_opt_var2 ( FLA_Obj  A,
FLA_Obj  T 
)
14{
15 return FLA_Hess_UT_step_opt_var2( A, T );
16}

References FLA_Hess_UT_step_opt_var2(), and i.

Referenced by FLA_Hess_UT_internal().

◆ FLA_Hess_UT_opt_var3()

FLA_Error FLA_Hess_UT_opt_var3 ( FLA_Obj  A,
FLA_Obj  T 
)
14{
15 return FLA_Hess_UT_step_opt_var3( A, T );
16}

References FLA_Hess_UT_step_opt_var3(), and i.

Referenced by FLA_Hess_UT_internal().

◆ FLA_Hess_UT_opt_var4()

FLA_Error FLA_Hess_UT_opt_var4 ( FLA_Obj  A,
FLA_Obj  T 
)

◆ FLA_Hess_UT_opt_var5()

FLA_Error FLA_Hess_UT_opt_var5 ( FLA_Obj  A,
FLA_Obj  T 
)

◆ FLA_Hess_UT_step_ofc_var1()

FLA_Error FLA_Hess_UT_step_ofc_var1 ( int  m_A,
int  m_T,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_T,
int  rs_T,
int  cs_T 
)

◆ FLA_Hess_UT_step_ofc_var2()

FLA_Error FLA_Hess_UT_step_ofc_var2 ( int  m_A,
int  m_T,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_T,
int  rs_T,
int  cs_T 
)
497{
502
508 int i;
509
510 // b_alg = FLA_Obj_length( T );
511 int b_alg = m_T;
512
513 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &y );
514 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
515 scomplex* buff_y = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
516 scomplex* buff_z = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
517 int inc_y = 1;
518 int inc_z = 1;
519
520 for ( i = 0; i < b_alg; ++i )
521 {
522 scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
523 scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
524 scomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
525 scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
526 scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
527
528 scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
529 scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
530
531 scomplex* y0 = buff_y + (0 )*inc_y;
532 scomplex* y2 = buff_y + (i+1)*inc_y;
533
534 scomplex* z2 = buff_z + (i+1)*inc_z;
535
536 scomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
537 scomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
538
539 int m_ahead = m_A - i - 1;
540 int n_ahead = m_A - i - 1;
541 int m_behind = i;
542 int n_behind = i;
543
544 /*------------------------------------------------------------*/
545
546 if ( m_ahead > 0 )
547 {
548 // FLA_Househ2_UT( FLA_LEFT,
549 // a21_t,
550 // a21_b, tau11 );
552 a21_t,
553 a21_b, rs_A,
554 tau11 );
555
556 // FLA_Set( FLA_ONE, inv_tau11 );
557 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
558 // FLA_Copy( inv_tau11, minus_inv_tau11 );
559 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
562
563 // FLA_Copy( a21_t, first_elem );
564 // FLA_Set( FLA_ONE, a21_t );
565 first_elem = *a21_t;
566 *a21_t = *buff_1;
567
568 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, y2 );
569 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, z2 );
571 n_ahead,
572 A22, rs_A, cs_A,
573 a21, rs_A,
574 y2, inc_y,
575 z2, inc_z );
576
577 // FLA_Dotc( FLA_CONJUGATE, a21, z2, beta );
578 // FLA_Inv_scal( FLA_TWO, beta );
579 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
581 m_ahead,
582 a21, rs_A,
583 z2, inc_z,
584 &beta );
587
588 // FLA_Scal( minus_inv_tau11, conj_beta );
589 // FLA_Axpy( conj_beta, a21, y2 );
590 // FLA_Scal( inv_tau11, y2 );
593 m_ahead,
594 &conj_beta,
595 a21, rs_A,
596 y2, inc_y );
598 m_ahead,
599 &inv_tau11,
600 y2, inc_y );
601
602 // FLA_Scal( minus_inv_tau11, beta );
603 // FLA_Axpy( beta, a21, z2 );
604 // FLA_Scal( inv_tau11, z2 );
607 m_ahead,
608 &beta,
609 a21, rs_A,
610 z2, inc_z );
612 m_ahead,
613 &inv_tau11,
614 z2, inc_z );
615
616 // FLA_Dot( a12t, a21, dot_product );
617 // FLA_Scal( minus_inv_tau11, dot_product );
618 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
620 m_ahead,
621 a12t, cs_A,
622 a21, rs_A,
623 &dot_product );
626 m_ahead,
628 a21, rs_A,
629 a12t, cs_A );
630
631 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, y0 );
632 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, y0, a21, A02 );
635 m_behind,
636 n_ahead,
637 buff_1,
638 A02, rs_A, cs_A,
639 a21, rs_A,
640 buff_0,
641 y0, inc_y );
644 m_behind,
645 n_ahead,
647 y0, inc_y,
648 a21, rs_A,
649 A02, rs_A, cs_A );
650
651 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, a21, y2, A22 );
652 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, a21, A22 );
654 n_ahead,
655 buff_m1,
656 a21, rs_A,
657 y2, inc_y,
658 z2, inc_z,
659 a21, rs_A,
660 A22, rs_A, cs_A );
661
662 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
665 m_ahead,
666 n_behind,
667 buff_1,
668 A20, rs_A, cs_A,
669 a21, rs_A,
670 buff_0,
671 t01, rs_T );
672
673 // FLA_Copy( first_elem, a21_t );
674 *a21_t = first_elem;
675 }
676
677 /*------------------------------------------------------------*/
678
679 }
680
681 // FLA_Obj_free( &y );
682 // FLA_Obj_free( &z );
683 FLA_free( buff_y );
684 FLA_free( buff_z );
685
686 return FLA_SUCCESS;
687}
FLA_Error FLA_Fused_Gerc2_opc_var1(int m_A, int n_A, scomplex *buff_alpha, scomplex *buff_u, int inc_u, scomplex *buff_y, int inc_y, scomplex *buff_z, int inc_z, scomplex *buff_v, int inc_v, scomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Fused_Gerc2_opt_var1.c:241
FLA_Error FLA_Fused_Ahx_Ax_opc_var1(int m_A, int n_A, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_x, int inc_x, scomplex *buff_v, int inc_v, scomplex *buff_w, int inc_w)
Definition FLA_Fused_Ahx_Ax_opt_var1.c:256
FLA_Obj FLA_TWO
Definition FLA_Init.c:17
void FLA_free(void *ptr)
Definition FLA_Memory.c:247
void * FLA_malloc(size_t size)
Definition FLA_Memory.c:111
FLA_Error FLA_Househ2_UT_l_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *tau)
Definition FLA_Househ2_UT.c:390
void bl1_cgemv(trans1_t transa, conj1_t conjx, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition bl1_gemv.c:125
void bl1_cger(conj1_t conjx, conj1_t conjy, int m, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy, scomplex *a, int a_rs, int a_cs)
Definition bl1_ger.c:111
void bl1_cscalv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx)
Definition bl1_scalv.c:46
@ BLIS1_NO_TRANSPOSE
Definition blis_type_defs.h:54
@ BLIS1_CONJ_TRANSPOSE
Definition blis_type_defs.h:57

References bl1_caxpyv(), bl1_cdot(), bl1_cgemv(), bl1_cger(), bl1_cscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Fused_Ahx_Ax_opc_var1(), FLA_Fused_Gerc2_opc_var1(), FLA_Househ2_UT_l_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_ofu_var2().

◆ FLA_Hess_UT_step_ofc_var3()

FLA_Error FLA_Hess_UT_step_ofc_var3 ( int  m_A,
int  m_T,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_T,
int  rs_T,
int  cs_T 
)
753{
758
767 int i;
768
769 // b_alg = FLA_Obj_length( T );
770 int b_alg = m_T;
771
772 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
773 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &y );
774 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
775 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &v );
776 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
777 scomplex* buff_u = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
778 scomplex* buff_y = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
779 scomplex* buff_z = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
780 scomplex* buff_v = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
781 scomplex* buff_w = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
782 int inc_u = 1;
783 int inc_y = 1;
784 int inc_z = 1;
785 int inc_v = 1;
786 int inc_w = 1;
787
788 for ( i = 0; i < b_alg; ++i )
789 {
790 scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
791 scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
792 scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
793 scomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
794 scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
795 scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
796
797 scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
798 scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
799
801 scomplex* u2 = buff_u + (i+1)*inc_u;
802
803 scomplex* y0 = buff_y + (0 )*inc_y;
804 scomplex* psi1 = buff_y + (i )*inc_y;
805 scomplex* y2 = buff_y + (i+1)*inc_y;
806
807 scomplex* zeta1 = buff_z + (i )*inc_z;
808 scomplex* z2 = buff_z + (i+1)*inc_z;
809
810 scomplex* v2 = buff_v + (i+1)*inc_v;
811
812 scomplex* w2 = buff_w + (i+1)*inc_w;
813
814 scomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
815 scomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
816
817 int m_ahead = m_A - i - 1;
818 int n_ahead = m_A - i - 1;
819 int m_behind = i;
820 int n_behind = i;
821
822 /*------------------------------------------------------------*/
823
824 if ( m_behind > 0 )
825 {
826 // FLA_Copy( upsilon1, minus_upsilon1 );
827 // FLA_Scal( FLA_MINUS_ONE, minus_upsilon1 );
828 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon1, minus_conj_upsilon1 );
831
832 // FLA_Copy( psi1, minus_psi1 );
833 // FLA_Scal( FLA_MINUS_ONE, minus_psi1 );
834 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, minus_psi1, minus_conj_psi1 );
837
838 // FLA_Copy( zeta1, minus_zeta1 );
839 // FLA_Scal( FLA_MINUS_ONE, minus_zeta1 );
841
842 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon1, psi1, alpha11 );
843 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_zeta1, upsilon1, alpha11 );
845 1,
847 psi1, 1,
848 alpha11, 1 );
850 1,
852 upsilon1, 1,
853 alpha11, 1 );
854
855 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_upsilon1, y2, a12t );
856 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_zeta1, u2, a12t );
858 m_ahead,
860 y2, inc_y,
861 a12t, cs_A );
863 m_ahead,
865 u2, inc_u,
866 a12t, cs_A );
867
868 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi1, u2, a21 );
869 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_upsilon1, z2, a21 );
871 m_ahead,
873 u2, inc_u,
874 a21, rs_A );
876 m_ahead,
878 z2, inc_z,
879 a21, rs_A );
880 }
881
882 if ( m_ahead > 0 )
883 {
884 // FLA_Househ2_UT( FLA_LEFT,
885 // a21_t,
886 // a21_b, tau11 );
888 a21_t,
889 a21_b, rs_A,
890 tau11 );
891
892 // FLA_Set( FLA_ONE, inv_tau11 );
893 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
894 // FLA_Copy( inv_tau11, minus_inv_tau11 );
895 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
898
899 // FLA_Copy( a21_t, first_elem );
900 // FLA_Set( FLA_ONE, a21_t );
901 first_elem = *a21_t;
902 *a21_t = *buff_1;
903 }
904
905 if ( m_behind > 0 && m_ahead > 0 )
906 {
907 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u2, y2, A22 );
908 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, u2, A22 );
909 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, v2 );
910 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, w2 );
912 n_ahead,
913 buff_m1,
914 u2, inc_u,
915 y2, inc_y,
916 z2, inc_z,
917 A22, rs_A, cs_A,
918 a21, rs_A,
919 v2, inc_v,
920 w2, inc_w );
921 }
922 else if ( m_ahead > 0 )
923 {
924 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, v2 );
925 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, w2 );
927 n_ahead,
928 A22, rs_A, cs_A,
929 a21, rs_A,
930 v2, inc_v,
931 w2, inc_w );
932 }
933
934 if ( m_ahead > 0 )
935 {
936 // FLA_Copy( a21, u2 );
937 // FLA_Copy( v2, y2 );
938 // FLA_Copy( w2, z2 );
940 m_ahead,
941 a21, rs_A,
942 u2, inc_u );
944 m_ahead,
945 v2, inc_v,
946 y2, inc_y );
948 m_ahead,
949 w2, inc_w,
950 z2, inc_z );
951
952 // FLA_Dotc( FLA_CONJUGATE, a21, z2, beta );
953 // FLA_Inv_scal( FLA_TWO, beta );
954 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
956 m_ahead,
957 a21, rs_A,
958 z2, inc_z,
959 &beta );
962
963 // FLA_Scal( minus_inv_tau11, conj_beta );
964 // FLA_Axpy( conj_beta, a21, y2 );
965 // FLA_Scal( inv_tau11, y2 );
968 m_ahead,
969 &conj_beta,
970 a21, rs_A,
971 y2, inc_y );
973 m_ahead,
974 &inv_tau11,
975 y2, inc_y );
976
977 // FLA_Scal( minus_inv_tau11, beta );
978 // FLA_Axpy( beta, a21, z2 );
979 // FLA_Scal( inv_tau11, z2 );
982 m_ahead,
983 &beta,
984 a21, rs_A,
985 z2, inc_z );
987 m_ahead,
988 &inv_tau11,
989 z2, inc_z );
990
991 // FLA_Dot( a12t, a21, dot_product );
992 // FLA_Scal( minus_inv_tau11, dot_product );
993 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
995 m_ahead,
996 a12t, cs_A,
997 a21, rs_A,
998 &dot_product );
1001 m_ahead,
1002 &dot_product,
1003 a21, rs_A,
1004 a12t, cs_A );
1005
1006 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, y0 );
1007 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, y0, a21, A02 );
1010 m_behind,
1011 n_ahead,
1012 buff_1,
1013 A02, rs_A, cs_A,
1014 a21, rs_A,
1015 buff_0,
1016 y0, inc_y );
1019 m_behind,
1020 n_ahead,
1022 y0, inc_y,
1023 a21, rs_A,
1024 A02, rs_A, cs_A );
1025
1026 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
1029 m_ahead,
1030 n_behind,
1031 buff_1,
1032 A20, rs_A, cs_A,
1033 a21, rs_A,
1034 buff_0,
1035 t01, rs_T );
1036
1037 // FLA_Copy( first_elem, a21_t );
1038 *a21_t = first_elem;
1039 }
1040
1041 if ( m_behind + 1 == b_alg && m_ahead > 0 )
1042 {
1043 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u2, y2, A22 );
1044 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, u2, A22 );
1046 n_ahead,
1047 buff_m1,
1048 u2, inc_u,
1049 y2, inc_y,
1050 z2, inc_z,
1051 u2, inc_u,
1052 A22, rs_A, cs_A );
1053 }
1054
1055 /*------------------------------------------------------------*/
1056
1057 }
1058
1059 // FLA_Obj_free( &u );
1060 // FLA_Obj_free( &y );
1061 // FLA_Obj_free( &z );
1062 // FLA_Obj_free( &v );
1063 // FLA_Obj_free( &w );
1064 FLA_free( buff_u );
1065 FLA_free( buff_y );
1066 FLA_free( buff_z );
1067 FLA_free( buff_v );
1068 FLA_free( buff_w );
1069
1070 return FLA_SUCCESS;
1071}
FLA_Error FLA_Fused_Gerc2_Ahx_Ax_opc_var1(int m_A, int n_A, scomplex *buff_alpha, scomplex *buff_u, int inc_u, scomplex *buff_y, int inc_y, scomplex *buff_z, int inc_z, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_x, int inc_x, scomplex *buff_v, int inc_v, scomplex *buff_w, int inc_w)
Definition FLA_Fused_Gerc2_Ahx_Ax_opt_var1.c:327
double *restrict zeta1
Definition bl1_axmyv2.c:142
void bl1_ccopyv(conj1_t conj, int m, scomplex *x, int incx, scomplex *y, int incy)
Definition bl1_copyv.c:49

References bl1_caxpyv(), bl1_ccopyv(), bl1_cdot(), bl1_cgemv(), bl1_cger(), bl1_cscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Fused_Ahx_Ax_opc_var1(), FLA_Fused_Gerc2_Ahx_Ax_opc_var1(), FLA_Fused_Gerc2_opc_var1(), FLA_Househ2_UT_l_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, i, psi1, upsilon1, and zeta1.

Referenced by FLA_Hess_UT_step_ofu_var3().

◆ FLA_Hess_UT_step_ofc_var4()

FLA_Error FLA_Hess_UT_step_ofc_var4 ( int  m_A,
int  m_T,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_Y,
int  rs_Y,
int  cs_Y,
scomplex buff_Z,
int  rs_Z,
int  cs_Z,
scomplex buff_T,
int  rs_T,
int  cs_T 
)
688{
693
699 int i;
700
701 // b_alg = FLA_Obj_length( T );
702 int b_alg = m_T;
703
704 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &d );
705 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &e );
706 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
707 scomplex* buff_e = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
708 int inc_e = 1;
709
710 // FLA_Set( FLA_ZERO, Y );
711 // FLA_Set( FLA_ZERO, Z );
712 bl1_csetm( m_A,
713 b_alg,
714 buff_0,
715 buff_Y, rs_Y, cs_Y );
716 bl1_csetm( m_A,
717 b_alg,
718 buff_0,
719 buff_Z, rs_Z, cs_Z );
720
721 for ( i = 0; i < b_alg; ++i )
722 {
723 scomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
724 scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
725 scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
726 scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
727 scomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
728 scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
729 scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
730
731 scomplex* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
732 scomplex* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
733 scomplex* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
734
735 scomplex* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
736 scomplex* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
737 scomplex* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
738
739 scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
740 scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
741
742 scomplex* e0 = buff_e + (0 )*inc_e;
743
744 scomplex* a10t_r = a10t + (i-1)*cs_A + (0 )*rs_A;
745
746 scomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
747 scomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
748
749 scomplex* ABL = a10t;
750 scomplex* ZBL = z10t;
751
753
754 int m_ahead = m_A - i - 1;
755 int n_ahead = m_A - i - 1;
756 int m_behind = i;
757 int n_behind = i;
758
759 /*------------------------------------------------------------*/
760
761 if ( m_behind > 0 )
762 {
763 // FLA_Copy( a10t_r, last_elem );
764 // FLA_Set( FLA_ONE, a10t_r );
765 last_elem = *a10t_r;
766 *a10t_r = *buff_1;
767 }
768
769 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
770 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a10t, FLA_ONE, a2 );
773 m_ahead + 1,
774 n_behind,
775 buff_m1,
776 ABL, rs_A, cs_A,
777 y10t, cs_Y,
778 buff_1,
779 a2, rs_A );
782 m_ahead + 1,
783 n_behind,
784 buff_m1,
785 ZBL, rs_Z, cs_Z,
786 a10t, cs_A,
787 buff_1,
788 a2, rs_A );
789
790 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
791 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, A20, z10t, FLA_ONE, a12t );
794 m_ahead,
795 n_behind,
796 buff_m1,
797 Y20, rs_Y, cs_Y,
798 a10t, cs_A,
799 buff_1,
800 a12t, cs_A );
803 m_ahead,
804 n_behind,
805 buff_m1,
806 A20, rs_A, cs_A,
807 z10t, cs_Z,
808 buff_1,
809 a12t, cs_A );
810
811 if ( m_behind > 0 )
812 {
813 // FLA_Copy( last_elem, a10t_r );
814 *a10t_r = last_elem;
815 }
816
817 if ( m_ahead > 0 )
818 {
819 // FLA_Househ2_UT( FLA_LEFT,
820 // a21_t,
821 // a21_b, tau11 );
823 a21_t,
824 a21_b, rs_A,
825 tau11 );
826
827 // FLA_Set( FLA_ONE, inv_tau11 );
828 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
829 // FLA_Copy( inv_tau11, minus_inv_tau11 );
830 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
833
834 // FLA_Copy( a21_t, first_elem );
835 // FLA_Set( FLA_ONE, a21_t );
836 first_elem = *a21_t;
837 *a21_t = *buff_1;
838
839 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, y21 );
840 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, z21 );
842 n_ahead,
843 A22, rs_A, cs_A,
844 a21, rs_A,
845 y21, rs_Y,
846 z21, rs_Z );
847
848 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, d0 );
849 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Y20, a21, FLA_ZERO, e0 );
850 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, a21, FLA_ZERO, f0 );
851 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
852 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, y21 );
853 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, e0, FLA_ONE, z21 );
854 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, d0, FLA_ONE, z21 );
855 // FLA_Copy( d0, t01 );
857 n_behind,
858 buff_m1,
859 A20, rs_A, cs_A,
860 Y20, rs_Y, cs_Y,
861 Z20, rs_Z, cs_Z,
862 t01, rs_T,
863 a21, rs_A,
864 y21, rs_Y,
865 z21, rs_Z );
866
867 // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
868 // FLA_Inv_scal( FLA_TWO, beta );
869 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
871 m_ahead,
872 a21, rs_A,
873 z21, rs_Z,
874 &beta );
877
878 // FLA_Scal( minus_inv_tau11, conj_beta );
879 // FLA_Axpy( conj_beta, a21, y21 );
880 // FLA_Scal( inv_tau11, y21 );
883 m_ahead,
884 &conj_beta,
885 a21, rs_A,
886 y21, rs_Y );
888 m_ahead,
889 &inv_tau11,
890 y21, rs_Y );
891
892 // FLA_Scal( minus_inv_tau11, beta );
893 // FLA_Axpy( beta, a21, z21 );
894 // FLA_Scal( inv_tau11, z21 );
897 m_ahead,
898 &beta,
899 a21, rs_A,
900 z21, rs_Z );
902 m_ahead,
903 &inv_tau11,
904 z21, rs_Z );
905
906 // FLA_Dot( a12t, a21, dot_product );
907 // FLA_Scal( minus_inv_tau11, dot_product );
908 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
910 m_ahead,
911 a12t, cs_A,
912 a21, rs_A,
913 &dot_product );
916 m_ahead,
918 a21, rs_A,
919 a12t, cs_A );
920
921 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, e0 );
922 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, e0, a21, A02 );
925 m_behind,
926 n_ahead,
927 buff_1,
928 A02, rs_A, cs_A,
929 a21, rs_A,
930 buff_0,
931 e0, inc_e );
934 m_behind,
935 n_ahead,
937 e0, inc_e,
938 a21, rs_A,
939 A02, rs_A, cs_A );
940
941 // FLA_Copy( first_elem, a21_t );
942 *a21_t = first_elem;
943 }
944
945 /*------------------------------------------------------------*/
946
947 }
948
949 // FLA_Obj_free( &e );
950 FLA_free( buff_e );
951
952 return FLA_SUCCESS;
953}
FLA_Error FLA_Fused_Uhu_Yhu_Zhu_opc_var1(int m_U, int n_U, scomplex *buff_delta, scomplex *buff_U, int rs_U, int cs_U, scomplex *buff_Y, int rs_Y, int cs_Y, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_t, int inc_t, scomplex *buff_u, int inc_u, scomplex *buff_y, int inc_y, scomplex *buff_z, int inc_z)
Definition FLA_Fused_Uhu_Yhu_Zhu_opt_var1.c:398
void bl1_csetm(int m, int n, scomplex *sigma, scomplex *a, int a_rs, int a_cs)
Definition bl1_setm.c:61
@ BLIS1_CONJ_NO_TRANSPOSE
Definition blis_type_defs.h:56

References bl1_caxpyv(), bl1_cdot(), bl1_cgemv(), bl1_cger(), bl1_cscalv(), bl1_csetm(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Fused_Ahx_Ax_opc_var1(), FLA_Fused_Uhu_Yhu_Zhu_opc_var1(), FLA_Househ2_UT_l_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_ofu_var4().

◆ FLA_Hess_UT_step_ofd_var1()

FLA_Error FLA_Hess_UT_step_ofd_var1 ( int  m_A,
int  m_T,
double buff_A,
int  rs_A,
int  cs_A,
double buff_T,
int  rs_T,
int  cs_T 
)

◆ FLA_Hess_UT_step_ofd_var2()

FLA_Error FLA_Hess_UT_step_ofd_var2 ( int  m_A,
int  m_T,
double buff_A,
int  rs_A,
int  cs_A,
double buff_T,
int  rs_T,
int  cs_T 
)
299{
300 double* buff_2 = FLA_DOUBLE_PTR( FLA_TWO );
301 double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
302 double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
304
305 double first_elem;
306 double dot_product;
307 double beta, conj_beta;
308 double inv_tau11;
309 double minus_inv_tau11;
310 int i;
311
312 // b_alg = FLA_Obj_length( T );
313 int b_alg = m_T;
314
315 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &y );
316 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
317 double* buff_y = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
318 double* buff_z = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
319 int inc_y = 1;
320 int inc_z = 1;
321
322 for ( i = 0; i < b_alg; ++i )
323 {
324 double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
325 double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
326 double* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
327 double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
328 double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
329
330 double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
331 double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
332
333 double* y0 = buff_y + (0 )*inc_y;
334 double* y2 = buff_y + (i+1)*inc_y;
335
336 double* z2 = buff_z + (i+1)*inc_z;
337
338 double* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
339 double* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
340
341 int m_ahead = m_A - i - 1;
342 int n_ahead = m_A - i - 1;
343 int m_behind = i;
344 int n_behind = i;
345
346 /*------------------------------------------------------------*/
347
348 if ( m_ahead > 0 )
349 {
350 // FLA_Househ2_UT( FLA_LEFT,
351 // a21_t,
352 // a21_b, tau11 );
354 a21_t,
355 a21_b, rs_A,
356 tau11 );
357
358 // FLA_Set( FLA_ONE, inv_tau11 );
359 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
360 // FLA_Copy( inv_tau11, minus_inv_tau11 );
361 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
364
365 // FLA_Copy( a21_t, first_elem );
366 // FLA_Set( FLA_ONE, a21_t );
367 first_elem = *a21_t;
368 *a21_t = *buff_1;
369
370 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, y2 );
371 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, z2 );
373 n_ahead,
374 A22, rs_A, cs_A,
375 a21, rs_A,
376 y2, inc_y,
377 z2, inc_z );
378
379 // FLA_Dotc( FLA_CONJUGATE, a21, z2, beta );
380 // FLA_Inv_scal( FLA_TWO, beta );
381 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
383 m_ahead,
384 a21, rs_A,
385 z2, inc_z,
386 &beta );
389
390 // FLA_Scal( minus_inv_tau11, conj_beta );
391 // FLA_Axpy( conj_beta, a21, y2 );
392 // FLA_Scal( inv_tau11, y2 );
395 m_ahead,
396 &conj_beta,
397 a21, rs_A,
398 y2, inc_y );
400 m_ahead,
401 &inv_tau11,
402 y2, inc_y );
403
404 // FLA_Scal( minus_inv_tau11, beta );
405 // FLA_Axpy( beta, a21, z2 );
406 // FLA_Scal( inv_tau11, z2 );
409 m_ahead,
410 &beta,
411 a21, rs_A,
412 z2, inc_z );
414 m_ahead,
415 &inv_tau11,
416 z2, inc_z );
417
418 // FLA_Dot( a12t, a21, dot_product );
419 // FLA_Scal( minus_inv_tau11, dot_product );
420 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
422 m_ahead,
423 a12t, cs_A,
424 a21, rs_A,
425 &dot_product );
428 m_ahead,
430 a21, rs_A,
431 a12t, cs_A );
432
433 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, y0 );
434 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, y0, a21, A02 );
437 m_behind,
438 n_ahead,
439 buff_1,
440 A02, rs_A, cs_A,
441 a21, rs_A,
442 buff_0,
443 y0, inc_y );
446 m_behind,
447 n_ahead,
449 y0, inc_y,
450 a21, rs_A,
451 A02, rs_A, cs_A );
452
453 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, a21, y2, A22 );
454 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, a21, A22 );
456 n_ahead,
457 buff_m1,
458 a21, rs_A,
459 y2, inc_y,
460 z2, inc_z,
461 a21, rs_A,
462 A22, rs_A, cs_A );
463
464 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
467 m_ahead,
468 n_behind,
469 buff_1,
470 A20, rs_A, cs_A,
471 a21, rs_A,
472 buff_0,
473 t01, rs_T );
474
475 // FLA_Copy( first_elem, a21_t );
476 *a21_t = first_elem;
477 }
478
479 /*------------------------------------------------------------*/
480
481 }
482
483 // FLA_Obj_free( &y );
484 // FLA_Obj_free( &z );
485 FLA_free( buff_y );
486 FLA_free( buff_z );
487
488 return FLA_SUCCESS;
489}
FLA_Error FLA_Fused_Gerc2_opd_var1(int m_A, int n_A, double *buff_alpha, double *buff_u, int inc_u, double *buff_y, int inc_y, double *buff_z, int inc_z, double *buff_v, int inc_v, double *buff_A, int rs_A, int cs_A)
Definition FLA_Fused_Gerc2_opt_var1.c:193
FLA_Error FLA_Fused_Ahx_Ax_opd_var1(int m_A, int n_A, double *buff_A, int rs_A, int cs_A, double *buff_x, int inc_x, double *buff_v, int inc_v, double *buff_w, int inc_w)
Definition FLA_Fused_Ahx_Ax_opt_var1.c:173
FLA_Error FLA_Househ2_UT_l_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *tau)
Definition FLA_Househ2_UT.c:274
void bl1_ddot(conj1_t conj, int n, double *x, int incx, double *y, int incy, double *rho)
Definition bl1_dot.c:26
void bl1_dgemv(trans1_t transa, conj1_t conjx, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
Definition bl1_gemv.c:69
void bl1_dger(conj1_t conjx, conj1_t conjy, int m, int n, double *alpha, double *x, int incx, double *y, int incy, double *a, int a_rs, int a_cs)
Definition bl1_ger.c:62
void bl1_dscalv(conj1_t conj, int n, double *alpha, double *x, int incx)
Definition bl1_scalv.c:24

References bl1_daxpyv(), bl1_ddot(), bl1_dgemv(), bl1_dger(), bl1_dscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Fused_Ahx_Ax_opd_var1(), FLA_Fused_Gerc2_opd_var1(), FLA_Househ2_UT_l_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_ofu_var2().

◆ FLA_Hess_UT_step_ofd_var3()

FLA_Error FLA_Hess_UT_step_ofd_var3 ( int  m_A,
int  m_T,
double buff_A,
int  rs_A,
int  cs_A,
double buff_T,
int  rs_T,
int  cs_T 
)
427{
428 double* buff_2 = FLA_DOUBLE_PTR( FLA_TWO );
429 double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
430 double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
432
433 double first_elem;
434 double dot_product;
435 double beta, conj_beta;
436 double inv_tau11;
437 double minus_inv_tau11;
440 double minus_zeta1;
441 int i;
442
443 // b_alg = FLA_Obj_length( T );
444 int b_alg = m_T;
445
446 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
447 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &y );
448 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
449 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &v );
450 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
451 double* buff_u = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
452 double* buff_y = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
453 double* buff_z = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
454 double* buff_v = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
455 double* buff_w = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
456 int inc_u = 1;
457 int inc_y = 1;
458 int inc_z = 1;
459 int inc_v = 1;
460 int inc_w = 1;
461
462 for ( i = 0; i < b_alg; ++i )
463 {
464 double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
465 double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
466 double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
467 double* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
468 double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
469 double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
470
471 double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
472 double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
473
474 double* upsilon1 = buff_u + (i )*inc_u;
475 double* u2 = buff_u + (i+1)*inc_u;
476
477 double* y0 = buff_y + (0 )*inc_y;
478 double* psi1 = buff_y + (i )*inc_y;
479 double* y2 = buff_y + (i+1)*inc_y;
480
481 double* zeta1 = buff_z + (i )*inc_z;
482 double* z2 = buff_z + (i+1)*inc_z;
483
484 double* v2 = buff_v + (i+1)*inc_v;
485
486 double* w2 = buff_w + (i+1)*inc_w;
487
488 double* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
489 double* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
490
491 int m_ahead = m_A - i - 1;
492 int n_ahead = m_A - i - 1;
493 int m_behind = i;
494 int n_behind = i;
495
496 /*------------------------------------------------------------*/
497
498 if ( m_behind > 0 )
499 {
500 // FLA_Copy( upsilon1, minus_upsilon1 );
501 // FLA_Scal( FLA_MINUS_ONE, minus_upsilon1 );
502 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon1, minus_conj_upsilon1 );
505
506 // FLA_Copy( psi1, minus_psi1 );
507 // FLA_Scal( FLA_MINUS_ONE, minus_psi1 );
508 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, minus_psi1, minus_conj_psi1 );
511
512 // FLA_Copy( zeta1, minus_zeta1 );
513 // FLA_Scal( FLA_MINUS_ONE, minus_zeta1 );
515
516 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon1, psi1, alpha11 );
517 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_zeta1, upsilon1, alpha11 );
519 1,
521 psi1, 1,
522 alpha11, 1 );
524 1,
526 upsilon1, 1,
527 alpha11, 1 );
528
529 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_upsilon1, y2, a12t );
530 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_zeta1, u2, a12t );
532 m_ahead,
534 y2, inc_y,
535 a12t, cs_A );
537 m_ahead,
539 u2, inc_u,
540 a12t, cs_A );
541
542 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi1, u2, a21 );
543 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_upsilon1, z2, a21 );
545 m_ahead,
547 u2, inc_u,
548 a21, rs_A );
550 m_ahead,
552 z2, inc_z,
553 a21, rs_A );
554 }
555
556 if ( m_ahead > 0 )
557 {
558 // FLA_Househ2_UT( FLA_LEFT,
559 // a21_t,
560 // a21_b, tau11 );
562 a21_t,
563 a21_b, rs_A,
564 tau11 );
565
566 // FLA_Set( FLA_ONE, inv_tau11 );
567 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
568 // FLA_Copy( inv_tau11, minus_inv_tau11 );
569 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
572
573 // FLA_Copy( a21_t, first_elem );
574 // FLA_Set( FLA_ONE, a21_t );
575 first_elem = *a21_t;
576 *a21_t = *buff_1;
577 }
578
579 if ( m_behind > 0 && m_ahead > 0 )
580 {
581 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u2, y2, A22 );
582 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, u2, A22 );
583 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, v2 );
584 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, w2 );
586 n_ahead,
587 buff_m1,
588 u2, inc_u,
589 y2, inc_y,
590 z2, inc_z,
591 A22, rs_A, cs_A,
592 a21, rs_A,
593 v2, inc_v,
594 w2, inc_w );
595 }
596 else if ( m_ahead > 0 )
597 {
598 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, v2 );
599 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, w2 );
601 n_ahead,
602 A22, rs_A, cs_A,
603 a21, rs_A,
604 v2, inc_v,
605 w2, inc_w );
606 }
607
608 if ( m_ahead > 0 )
609 {
610 // FLA_Copy( a21, u2 );
611 // FLA_Copy( v2, y2 );
612 // FLA_Copy( w2, z2 );
614 m_ahead,
615 a21, rs_A,
616 u2, inc_u );
618 m_ahead,
619 v2, inc_v,
620 y2, inc_y );
622 m_ahead,
623 w2, inc_w,
624 z2, inc_z );
625
626 // FLA_Dotc( FLA_CONJUGATE, a21, z2, beta );
627 // FLA_Inv_scal( FLA_TWO, beta );
628 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
630 m_ahead,
631 a21, rs_A,
632 z2, inc_z,
633 &beta );
636
637 // FLA_Scal( minus_inv_tau11, conj_beta );
638 // FLA_Axpy( conj_beta, a21, y2 );
639 // FLA_Scal( inv_tau11, y2 );
642 m_ahead,
643 &conj_beta,
644 a21, rs_A,
645 y2, inc_y );
647 m_ahead,
648 &inv_tau11,
649 y2, inc_y );
650
651 // FLA_Scal( minus_inv_tau11, beta );
652 // FLA_Axpy( beta, a21, z2 );
653 // FLA_Scal( inv_tau11, z2 );
656 m_ahead,
657 &beta,
658 a21, rs_A,
659 z2, inc_z );
661 m_ahead,
662 &inv_tau11,
663 z2, inc_z );
664
665 // FLA_Dot( a12t, a21, dot_product );
666 // FLA_Scal( minus_inv_tau11, dot_product );
667 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
669 m_ahead,
670 a12t, cs_A,
671 a21, rs_A,
672 &dot_product );
675 m_ahead,
677 a21, rs_A,
678 a12t, cs_A );
679
680 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, y0 );
681 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, y0, a21, A02 );
684 m_behind,
685 n_ahead,
686 buff_1,
687 A02, rs_A, cs_A,
688 a21, rs_A,
689 buff_0,
690 y0, inc_y );
693 m_behind,
694 n_ahead,
696 y0, inc_y,
697 a21, rs_A,
698 A02, rs_A, cs_A );
699
700 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
703 m_ahead,
704 n_behind,
705 buff_1,
706 A20, rs_A, cs_A,
707 a21, rs_A,
708 buff_0,
709 t01, rs_T );
710
711 // FLA_Copy( first_elem, a21_t );
712 *a21_t = first_elem;
713 }
714
715 if ( m_behind + 1 == b_alg && m_ahead > 0 )
716 {
717 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u2, y2, A22 );
718 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, u2, A22 );
720 n_ahead,
721 buff_m1,
722 u2, inc_u,
723 y2, inc_y,
724 z2, inc_z,
725 u2, inc_u,
726 A22, rs_A, cs_A );
727 }
728
729 /*------------------------------------------------------------*/
730
731 }
732
733 // FLA_Obj_free( &u );
734 // FLA_Obj_free( &y );
735 // FLA_Obj_free( &z );
736 // FLA_Obj_free( &v );
737 // FLA_Obj_free( &w );
738 FLA_free( buff_u );
739 FLA_free( buff_y );
740 FLA_free( buff_z );
741 FLA_free( buff_v );
742 FLA_free( buff_w );
743
744 return FLA_SUCCESS;
745}
FLA_Error FLA_Fused_Gerc2_Ahx_Ax_opd_var1(int m_A, int n_A, double *buff_alpha, double *buff_u, int inc_u, double *buff_y, int inc_y, double *buff_z, int inc_z, double *buff_A, int rs_A, int cs_A, double *buff_x, int inc_x, double *buff_v, int inc_v, double *buff_w, int inc_w)
Definition FLA_Fused_Gerc2_Ahx_Ax_opt_var1.c:248
void bl1_dcopyv(conj1_t conj, int m, double *x, int incx, double *y, int incy)
Definition bl1_copyv.c:42

References bl1_daxpyv(), bl1_dcopyv(), bl1_ddot(), bl1_dgemv(), bl1_dger(), bl1_dscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Fused_Ahx_Ax_opd_var1(), FLA_Fused_Gerc2_Ahx_Ax_opd_var1(), FLA_Fused_Gerc2_opd_var1(), FLA_Househ2_UT_l_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, i, psi1, upsilon1, and zeta1.

Referenced by FLA_Hess_UT_step_ofu_var3().

◆ FLA_Hess_UT_step_ofd_var4()

FLA_Error FLA_Hess_UT_step_ofd_var4 ( int  m_A,
int  m_T,
double buff_A,
int  rs_A,
int  cs_A,
double buff_Y,
int  rs_Y,
int  cs_Y,
double buff_Z,
int  rs_Z,
int  cs_Z,
double buff_T,
int  rs_T,
int  cs_T 
)
413{
414 double* buff_2 = FLA_DOUBLE_PTR( FLA_TWO );
415 double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
416 double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
418
419 double first_elem, last_elem;
420 double dot_product;
421 double beta, conj_beta;
422 double inv_tau11;
423 double minus_inv_tau11;
424 int i;
425
426 // b_alg = FLA_Obj_length( T );
427 int b_alg = m_T;
428
429 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &d );
430 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &e );
431 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
432 double* buff_e = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
433 int inc_e = 1;
434
435 // FLA_Set( FLA_ZERO, Y );
436 // FLA_Set( FLA_ZERO, Z );
437 bl1_dsetm( m_A,
438 b_alg,
439 buff_0,
440 buff_Y, rs_Y, cs_Y );
441 bl1_dsetm( m_A,
442 b_alg,
443 buff_0,
444 buff_Z, rs_Z, cs_Z );
445
446 for ( i = 0; i < b_alg; ++i )
447 {
448 double* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
449 double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
450 double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
451 double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
452 double* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
453 double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
454 double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
455
456 double* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
457 double* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
458 double* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
459
460 double* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
461 double* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
462 double* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
463
464 double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
465 double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
466
467 double* e0 = buff_e + (0 )*inc_e;
468
469 double* a10t_r = a10t + (i-1)*cs_A + (0 )*rs_A;
470
471 double* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
472 double* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
473
474 double* ABL = a10t;
475 double* ZBL = z10t;
476
477 double* a2 = alpha11;
478
479 int m_ahead = m_A - i - 1;
480 int n_ahead = m_A - i - 1;
481 int m_behind = i;
482 int n_behind = i;
483
484 /*------------------------------------------------------------*/
485
486 if ( m_behind > 0 )
487 {
488 // FLA_Copy( a10t_r, last_elem );
489 // FLA_Set( FLA_ONE, a10t_r );
490 last_elem = *a10t_r;
491 *a10t_r = *buff_1;
492 }
493
494 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
495 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a10t, FLA_ONE, a2 );
498 m_ahead + 1,
499 n_behind,
500 buff_m1,
501 ABL, rs_A, cs_A,
502 y10t, cs_Y,
503 buff_1,
504 a2, rs_A );
507 m_ahead + 1,
508 n_behind,
509 buff_m1,
510 ZBL, rs_Z, cs_Z,
511 a10t, cs_A,
512 buff_1,
513 a2, rs_A );
514
515 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
516 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, A20, z10t, FLA_ONE, a12t );
519 m_ahead,
520 n_behind,
521 buff_m1,
522 Y20, rs_Y, cs_Y,
523 a10t, cs_A,
524 buff_1,
525 a12t, cs_A );
528 m_ahead,
529 n_behind,
530 buff_m1,
531 A20, rs_A, cs_A,
532 z10t, cs_Z,
533 buff_1,
534 a12t, cs_A );
535
536 if ( m_behind > 0 )
537 {
538 // FLA_Copy( last_elem, a10t_r );
539 *a10t_r = last_elem;
540 }
541
542 if ( m_ahead > 0 )
543 {
544 // FLA_Househ2_UT( FLA_LEFT,
545 // a21_t,
546 // a21_b, tau11 );
548 a21_t,
549 a21_b, rs_A,
550 tau11 );
551
552 // FLA_Set( FLA_ONE, inv_tau11 );
553 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
554 // FLA_Copy( inv_tau11, minus_inv_tau11 );
555 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
558
559 // FLA_Copy( a21_t, first_elem );
560 // FLA_Set( FLA_ONE, a21_t );
561 first_elem = *a21_t;
562 *a21_t = *buff_1;
563
564 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, y21 );
565 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, z21 );
567 n_ahead,
568 A22, rs_A, cs_A,
569 a21, rs_A,
570 y21, rs_Y,
571 z21, rs_Z );
572
573 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, d0 );
574 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Y20, a21, FLA_ZERO, e0 );
575 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, a21, FLA_ZERO, f0 );
576 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
577 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, y21 );
578 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, e0, FLA_ONE, z21 );
579 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, d0, FLA_ONE, z21 );
580 // FLA_Copy( d0, t01 );
582 n_behind,
583 buff_m1,
584 A20, rs_A, cs_A,
585 Y20, rs_Y, cs_Y,
586 Z20, rs_Z, cs_Z,
587 t01, rs_T,
588 a21, rs_A,
589 y21, rs_Y,
590 z21, rs_Z );
591
592 // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
593 // FLA_Inv_scal( FLA_TWO, beta );
594 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
596 m_ahead,
597 a21, rs_A,
598 z21, rs_Z,
599 &beta );
602
603 // FLA_Scal( minus_inv_tau11, conj_beta );
604 // FLA_Axpy( conj_beta, a21, y21 );
605 // FLA_Scal( inv_tau11, y21 );
608 m_ahead,
609 &conj_beta,
610 a21, rs_A,
611 y21, rs_Y );
613 m_ahead,
614 &inv_tau11,
615 y21, rs_Y );
616
617 // FLA_Scal( minus_inv_tau11, beta );
618 // FLA_Axpy( beta, a21, z21 );
619 // FLA_Scal( inv_tau11, z21 );
622 m_ahead,
623 &beta,
624 a21, rs_A,
625 z21, rs_Z );
627 m_ahead,
628 &inv_tau11,
629 z21, rs_Z );
630
631 // FLA_Dot( a12t, a21, dot_product );
632 // FLA_Scal( minus_inv_tau11, dot_product );
633 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
635 m_ahead,
636 a12t, cs_A,
637 a21, rs_A,
638 &dot_product );
641 m_ahead,
643 a21, rs_A,
644 a12t, cs_A );
645
646 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, e0 );
647 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, e0, a21, A02 );
650 m_behind,
651 n_ahead,
652 buff_1,
653 A02, rs_A, cs_A,
654 a21, rs_A,
655 buff_0,
656 e0, inc_e );
659 m_behind,
660 n_ahead,
662 e0, inc_e,
663 a21, rs_A,
664 A02, rs_A, cs_A );
665
666 // FLA_Copy( first_elem, a21_t );
667 *a21_t = first_elem;
668 }
669
670 /*------------------------------------------------------------*/
671
672 }
673
674 // FLA_Obj_free( &e );
675 FLA_free( buff_e );
676
677 return FLA_SUCCESS;
678}
FLA_Error FLA_Fused_Uhu_Yhu_Zhu_opd_var1(int m_U, int n_U, double *buff_delta, double *buff_U, int rs_U, int cs_U, double *buff_Y, int rs_Y, int cs_Y, double *buff_Z, int rs_Z, int cs_Z, double *buff_t, int inc_t, double *buff_u, int inc_u, double *buff_y, int inc_y, double *buff_z, int inc_z)
Definition FLA_Fused_Uhu_Yhu_Zhu_opt_var1.c:270
void bl1_dsetm(int m, int n, double *sigma, double *a, int a_rs, int a_cs)
Definition bl1_setm.c:45

References bl1_daxpyv(), bl1_ddot(), bl1_dgemv(), bl1_dger(), bl1_dscalv(), bl1_dsetm(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Fused_Ahx_Ax_opd_var1(), FLA_Fused_Uhu_Yhu_Zhu_opd_var1(), FLA_Househ2_UT_l_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_ofu_var4().

◆ FLA_Hess_UT_step_ofs_var1()

FLA_Error FLA_Hess_UT_step_ofs_var1 ( int  m_A,
int  m_T,
float buff_A,
int  rs_A,
int  cs_A,
float buff_T,
int  rs_T,
int  cs_T 
)

◆ FLA_Hess_UT_step_ofs_var2()

FLA_Error FLA_Hess_UT_step_ofs_var2 ( int  m_A,
int  m_T,
float buff_A,
int  rs_A,
int  cs_A,
float buff_T,
int  rs_T,
int  cs_T 
)
101{
102 float* buff_2 = FLA_FLOAT_PTR( FLA_TWO );
103 float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
104 float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
106
107 float first_elem;
108 float dot_product;
109 float beta, conj_beta;
110 float inv_tau11;
111 float minus_inv_tau11;
112 int i;
113
114 // b_alg = FLA_Obj_length( T );
115 int b_alg = m_T;
116
117 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &y );
118 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
119 float* buff_y = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
120 float* buff_z = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
121 int inc_y = 1;
122 int inc_z = 1;
123
124 for ( i = 0; i < b_alg; ++i )
125 {
126 float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
127 float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
128 float* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
129 float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
130 float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
131
132 float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
133 float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
134
135 float* y0 = buff_y + (0 )*inc_y;
136 float* y2 = buff_y + (i+1)*inc_y;
137
138 float* z2 = buff_z + (i+1)*inc_z;
139
140 float* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
141 float* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
142
143 int m_ahead = m_A - i - 1;
144 int n_ahead = m_A - i - 1;
145 int m_behind = i;
146 int n_behind = i;
147
148 /*------------------------------------------------------------*/
149
150 if ( m_ahead > 0 )
151 {
152 // FLA_Househ2_UT( FLA_LEFT,
153 // a21_t,
154 // a21_b, tau11 );
156 a21_t,
157 a21_b, rs_A,
158 tau11 );
159
160 // FLA_Set( FLA_ONE, inv_tau11 );
161 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
162 // FLA_Copy( inv_tau11, minus_inv_tau11 );
163 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
166
167 // FLA_Copy( a21_t, first_elem );
168 // FLA_Set( FLA_ONE, a21_t );
169 first_elem = *a21_t;
170 *a21_t = *buff_1;
171
172 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, y2 );
173 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, z2 );
175 n_ahead,
176 A22, rs_A, cs_A,
177 a21, rs_A,
178 y2, inc_y,
179 z2, inc_z );
180
181 // FLA_Dotc( FLA_CONJUGATE, a21, z2, beta );
182 // FLA_Inv_scal( FLA_TWO, beta );
183 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
185 m_ahead,
186 a21, rs_A,
187 z2, inc_z,
188 &beta );
191
192 // FLA_Scal( minus_inv_tau11, conj_beta );
193 // FLA_Axpy( conj_beta, a21, y2 );
194 // FLA_Scal( inv_tau11, y2 );
197 m_ahead,
198 &conj_beta,
199 a21, rs_A,
200 y2, inc_y );
202 m_ahead,
203 &inv_tau11,
204 y2, inc_y );
205
206 // FLA_Scal( minus_inv_tau11, beta );
207 // FLA_Axpy( beta, a21, z2 );
208 // FLA_Scal( inv_tau11, z2 );
211 m_ahead,
212 &beta,
213 a21, rs_A,
214 z2, inc_z );
216 m_ahead,
217 &inv_tau11,
218 z2, inc_z );
219
220 // FLA_Dot( a12t, a21, dot_product );
221 // FLA_Scal( minus_inv_tau11, dot_product );
222 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
224 m_ahead,
225 a12t, cs_A,
226 a21, rs_A,
227 &dot_product );
230 m_ahead,
232 a21, rs_A,
233 a12t, cs_A );
234
235 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, y0 );
236 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, y0, a21, A02 );
239 m_behind,
240 n_ahead,
241 buff_1,
242 A02, rs_A, cs_A,
243 a21, rs_A,
244 buff_0,
245 y0, inc_y );
248 m_behind,
249 n_ahead,
251 y0, inc_y,
252 a21, rs_A,
253 A02, rs_A, cs_A );
254
255 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, a21, y2, A22 );
256 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, a21, A22 );
258 n_ahead,
259 buff_m1,
260 a21, rs_A,
261 y2, inc_y,
262 z2, inc_z,
263 a21, rs_A,
264 A22, rs_A, cs_A );
265
266 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
269 m_ahead,
270 n_behind,
271 buff_1,
272 A20, rs_A, cs_A,
273 a21, rs_A,
274 buff_0,
275 t01, rs_T );
276
277 // FLA_Copy( first_elem, a21_t );
278 *a21_t = first_elem;
279 }
280
281 /*------------------------------------------------------------*/
282
283 }
284
285 // FLA_Obj_free( &y );
286 // FLA_Obj_free( &z );
287 FLA_free( buff_y );
288 FLA_free( buff_z );
289
290 return FLA_SUCCESS;
291}
FLA_Error FLA_Fused_Gerc2_ops_var1(int m_A, int n_A, float *buff_alpha, float *buff_u, int inc_u, float *buff_y, int inc_y, float *buff_z, int inc_z, float *buff_v, int inc_v, float *buff_A, int rs_A, int cs_A)
Definition FLA_Fused_Gerc2_opt_var1.c:130
FLA_Error FLA_Fused_Ahx_Ax_ops_var1(int m_A, int n_A, float *buff_A, int rs_A, int cs_A, float *buff_x, int inc_x, float *buff_v, int inc_v, float *buff_w, int inc_w)
Definition FLA_Fused_Ahx_Ax_opt_var1.c:116
FLA_Error FLA_Househ2_UT_l_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *tau)
Definition FLA_Househ2_UT.c:160
void bl1_sgemv(trans1_t transa, conj1_t conjx, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
Definition bl1_gemv.c:13
void bl1_sger(conj1_t conjx, conj1_t conjy, int m, int n, float *alpha, float *x, int incx, float *y, int incy, float *a, int a_rs, int a_cs)
Definition bl1_ger.c:13
void bl1_sscalv(conj1_t conj, int n, float *alpha, float *x, int incx)
Definition bl1_scalv.c:13

References bl1_saxpyv(), bl1_sdot(), bl1_sgemv(), bl1_sger(), bl1_sscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Fused_Ahx_Ax_ops_var1(), FLA_Fused_Gerc2_ops_var1(), FLA_Househ2_UT_l_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_ofu_var2().

◆ FLA_Hess_UT_step_ofs_var3()

FLA_Error FLA_Hess_UT_step_ofs_var3 ( int  m_A,
int  m_T,
float buff_A,
int  rs_A,
int  cs_A,
float buff_T,
int  rs_T,
int  cs_T 
)
101{
102 float* buff_2 = FLA_FLOAT_PTR( FLA_TWO );
103 float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
104 float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
106
107 float first_elem;
108 float dot_product;
109 float beta, conj_beta;
110 float inv_tau11;
111 float minus_inv_tau11;
114 float minus_zeta1;
115 int i;
116
117 // b_alg = FLA_Obj_length( T );
118 int b_alg = m_T;
119
120 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
121 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &y );
122 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
123 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &v );
124 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
125 float* buff_u = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
126 float* buff_y = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
127 float* buff_z = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
128 float* buff_v = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
129 float* buff_w = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
130 int inc_u = 1;
131 int inc_y = 1;
132 int inc_z = 1;
133 int inc_v = 1;
134 int inc_w = 1;
135
136 for ( i = 0; i < b_alg; ++i )
137 {
138 float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
139 float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
140 float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
141 float* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
142 float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
143 float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
144
145 float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
146 float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
147
148 float* upsilon1 = buff_u + (i )*inc_u;
149 float* u2 = buff_u + (i+1)*inc_u;
150
151 float* y0 = buff_y + (0 )*inc_y;
152 float* psi1 = buff_y + (i )*inc_y;
153 float* y2 = buff_y + (i+1)*inc_y;
154
155 float* zeta1 = buff_z + (i )*inc_z;
156 float* z2 = buff_z + (i+1)*inc_z;
157
158 float* v2 = buff_v + (i+1)*inc_v;
159
160 float* w2 = buff_w + (i+1)*inc_w;
161
162 float* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
163 float* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
164
165 int m_ahead = m_A - i - 1;
166 int n_ahead = m_A - i - 1;
167 int m_behind = i;
168 int n_behind = i;
169
170 /*------------------------------------------------------------*/
171
172 if ( m_behind > 0 )
173 {
174 // FLA_Copy( upsilon1, minus_upsilon1 );
175 // FLA_Scal( FLA_MINUS_ONE, minus_upsilon1 );
176 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon1, minus_conj_upsilon1 );
179
180 // FLA_Copy( psi1, minus_psi1 );
181 // FLA_Scal( FLA_MINUS_ONE, minus_psi1 );
182 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, minus_psi1, minus_conj_psi1 );
185
186 // FLA_Copy( zeta1, minus_zeta1 );
187 // FLA_Scal( FLA_MINUS_ONE, minus_zeta1 );
189
190 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon1, psi1, alpha11 );
191 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_zeta1, upsilon1, alpha11 );
193 1,
195 psi1, 1,
196 alpha11, 1 );
198 1,
200 upsilon1, 1,
201 alpha11, 1 );
202
203 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_upsilon1, y2, a12t );
204 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_zeta1, u2, a12t );
206 m_ahead,
208 y2, inc_y,
209 a12t, cs_A );
211 m_ahead,
213 u2, inc_u,
214 a12t, cs_A );
215
216 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi1, u2, a21 );
217 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_upsilon1, z2, a21 );
219 m_ahead,
221 u2, inc_u,
222 a21, rs_A );
224 m_ahead,
226 z2, inc_z,
227 a21, rs_A );
228 }
229
230 if ( m_ahead > 0 )
231 {
232 // FLA_Househ2_UT( FLA_LEFT,
233 // a21_t,
234 // a21_b, tau11 );
236 a21_t,
237 a21_b, rs_A,
238 tau11 );
239
240 // FLA_Set( FLA_ONE, inv_tau11 );
241 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
242 // FLA_Copy( inv_tau11, minus_inv_tau11 );
243 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
246
247 // FLA_Copy( a21_t, first_elem );
248 // FLA_Set( FLA_ONE, a21_t );
249 first_elem = *a21_t;
250 *a21_t = *buff_1;
251 }
252
253 if ( m_behind > 0 && m_ahead > 0 )
254 {
255 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u2, y2, A22 );
256 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, u2, A22 );
257 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, v2 );
258 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, w2 );
260 n_ahead,
261 buff_m1,
262 u2, inc_u,
263 y2, inc_y,
264 z2, inc_z,
265 A22, rs_A, cs_A,
266 a21, rs_A,
267 v2, inc_v,
268 w2, inc_w );
269 }
270 else if ( m_ahead > 0 )
271 {
272 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, v2 );
273 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, w2 );
275 n_ahead,
276 A22, rs_A, cs_A,
277 a21, rs_A,
278 v2, inc_v,
279 w2, inc_w );
280 }
281
282 if ( m_ahead > 0 )
283 {
284 // FLA_Copy( a21, u2 );
285 // FLA_Copy( v2, y2 );
286 // FLA_Copy( w2, z2 );
288 m_ahead,
289 a21, rs_A,
290 u2, inc_u );
292 m_ahead,
293 v2, inc_v,
294 y2, inc_y );
296 m_ahead,
297 w2, inc_w,
298 z2, inc_z );
299
300 // FLA_Dotc( FLA_CONJUGATE, a21, z2, beta );
301 // FLA_Inv_scal( FLA_TWO, beta );
302 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
304 m_ahead,
305 a21, rs_A,
306 z2, inc_z,
307 &beta );
310
311 // FLA_Scal( minus_inv_tau11, conj_beta );
312 // FLA_Axpy( conj_beta, a21, y2 );
313 // FLA_Scal( inv_tau11, y2 );
316 m_ahead,
317 &conj_beta,
318 a21, rs_A,
319 y2, inc_y );
321 m_ahead,
322 &inv_tau11,
323 y2, inc_y );
324
325 // FLA_Scal( minus_inv_tau11, beta );
326 // FLA_Axpy( beta, a21, z2 );
327 // FLA_Scal( inv_tau11, z2 );
330 m_ahead,
331 &beta,
332 a21, rs_A,
333 z2, inc_z );
335 m_ahead,
336 &inv_tau11,
337 z2, inc_z );
338
339 // FLA_Dot( a12t, a21, dot_product );
340 // FLA_Scal( minus_inv_tau11, dot_product );
341 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
343 m_ahead,
344 a12t, cs_A,
345 a21, rs_A,
346 &dot_product );
349 m_ahead,
351 a21, rs_A,
352 a12t, cs_A );
353
354 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, y0 );
355 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, y0, a21, A02 );
358 m_behind,
359 n_ahead,
360 buff_1,
361 A02, rs_A, cs_A,
362 a21, rs_A,
363 buff_0,
364 y0, inc_y );
367 m_behind,
368 n_ahead,
370 y0, inc_y,
371 a21, rs_A,
372 A02, rs_A, cs_A );
373
374 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
377 m_ahead,
378 n_behind,
379 buff_1,
380 A20, rs_A, cs_A,
381 a21, rs_A,
382 buff_0,
383 t01, rs_T );
384
385 // FLA_Copy( first_elem, a21_t );
386 *a21_t = first_elem;
387 }
388
389 if ( m_behind + 1 == b_alg && m_ahead > 0 )
390 {
391 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u2, y2, A22 );
392 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, u2, A22 );
394 n_ahead,
395 buff_m1,
396 u2, inc_u,
397 y2, inc_y,
398 z2, inc_z,
399 u2, inc_u,
400 A22, rs_A, cs_A );
401 }
402
403 /*------------------------------------------------------------*/
404
405 }
406
407 // FLA_Obj_free( &u );
408 // FLA_Obj_free( &y );
409 // FLA_Obj_free( &z );
410 // FLA_Obj_free( &v );
411 // FLA_Obj_free( &w );
412 FLA_free( buff_u );
413 FLA_free( buff_y );
414 FLA_free( buff_z );
415 FLA_free( buff_v );
416 FLA_free( buff_w );
417
418 return FLA_SUCCESS;
419}
FLA_Error FLA_Fused_Gerc2_Ahx_Ax_ops_var1(int m_A, int n_A, float *buff_alpha, float *buff_u, int inc_u, float *buff_y, int inc_y, float *buff_z, int inc_z, float *buff_A, int rs_A, int cs_A, float *buff_x, int inc_x, float *buff_v, int inc_v, float *buff_w, int inc_w)
Definition FLA_Fused_Gerc2_Ahx_Ax_opt_var1.c:150
void bl1_scopyv(conj1_t conj, int m, float *x, int incx, float *y, int incy)
Definition bl1_copyv.c:35

References bl1_saxpyv(), bl1_scopyv(), bl1_sdot(), bl1_sgemv(), bl1_sger(), bl1_sscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Fused_Ahx_Ax_ops_var1(), FLA_Fused_Gerc2_Ahx_Ax_ops_var1(), FLA_Fused_Gerc2_ops_var1(), FLA_Househ2_UT_l_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, i, psi1, upsilon1, and zeta1.

Referenced by FLA_Hess_UT_step_ofu_var3().

◆ FLA_Hess_UT_step_ofs_var4()

FLA_Error FLA_Hess_UT_step_ofs_var4 ( int  m_A,
int  m_T,
float buff_A,
int  rs_A,
int  cs_A,
float buff_Y,
int  rs_Y,
int  cs_Y,
float buff_Z,
int  rs_Z,
int  cs_Z,
float buff_T,
int  rs_T,
int  cs_T 
)
138{
139 float* buff_2 = FLA_FLOAT_PTR( FLA_TWO );
140 float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
141 float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
143
144 float first_elem, last_elem;
145 float dot_product;
146 float beta, conj_beta;
147 float inv_tau11;
148 float minus_inv_tau11;
149 int i;
150
151 // b_alg = FLA_Obj_length( T );
152 int b_alg = m_T;
153
154 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &d );
155 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &e );
156 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
157 float* buff_e = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
158 int inc_e = 1;
159
160 // FLA_Set( FLA_ZERO, Y );
161 // FLA_Set( FLA_ZERO, Z );
162 bl1_ssetm( m_A,
163 b_alg,
164 buff_0,
165 buff_Y, rs_Y, cs_Y );
166 bl1_ssetm( m_A,
167 b_alg,
168 buff_0,
169 buff_Z, rs_Z, cs_Z );
170
171 for ( i = 0; i < b_alg; ++i )
172 {
173 float* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
174 float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
175 float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
176 float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
177 float* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
178 float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
179 float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
180
181 float* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
182 float* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
183 float* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
184
185 float* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
186 float* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
187 float* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
188
189 float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
190 float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
191
192 float* e0 = buff_e + (0 )*inc_e;
193
194 float* a10t_r = a10t + (i-1)*cs_A + (0 )*rs_A;
195
196 float* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
197 float* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
198
199 float* ABL = a10t;
200 float* ZBL = z10t;
201
202 float* a2 = alpha11;
203
204 int m_ahead = m_A - i - 1;
205 int n_ahead = m_A - i - 1;
206 int m_behind = i;
207 int n_behind = i;
208
209 /*------------------------------------------------------------*/
210
211 if ( m_behind > 0 )
212 {
213 // FLA_Copy( a10t_r, last_elem );
214 // FLA_Set( FLA_ONE, a10t_r );
215 last_elem = *a10t_r;
216 *a10t_r = *buff_1;
217 }
218
219 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
220 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a10t, FLA_ONE, a2 );
223 m_ahead + 1,
224 n_behind,
225 buff_m1,
226 ABL, rs_A, cs_A,
227 y10t, cs_Y,
228 buff_1,
229 a2, rs_A );
232 m_ahead + 1,
233 n_behind,
234 buff_m1,
235 ZBL, rs_Z, cs_Z,
236 a10t, cs_A,
237 buff_1,
238 a2, rs_A );
239
240 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
241 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, A20, z10t, FLA_ONE, a12t );
244 m_ahead,
245 n_behind,
246 buff_m1,
247 Y20, rs_Y, cs_Y,
248 a10t, cs_A,
249 buff_1,
250 a12t, cs_A );
253 m_ahead,
254 n_behind,
255 buff_m1,
256 A20, rs_A, cs_A,
257 z10t, cs_Z,
258 buff_1,
259 a12t, cs_A );
260
261 if ( m_behind > 0 )
262 {
263 // FLA_Copy( last_elem, a10t_r );
264 *a10t_r = last_elem;
265 }
266
267 if ( m_ahead > 0 )
268 {
269 // FLA_Househ2_UT( FLA_LEFT,
270 // a21_t,
271 // a21_b, tau11 );
273 a21_t,
274 a21_b, rs_A,
275 tau11 );
276
277 // FLA_Set( FLA_ONE, inv_tau11 );
278 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
279 // FLA_Copy( inv_tau11, minus_inv_tau11 );
280 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
283
284 // FLA_Copy( a21_t, first_elem );
285 // FLA_Set( FLA_ONE, a21_t );
286 first_elem = *a21_t;
287 *a21_t = *buff_1;
288
289 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, y21 );
290 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, z21 );
292 n_ahead,
293 A22, rs_A, cs_A,
294 a21, rs_A,
295 y21, rs_Y,
296 z21, rs_Z );
297
298 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, d0 );
299 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Y20, a21, FLA_ZERO, e0 );
300 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, a21, FLA_ZERO, f0 );
301 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
302 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, y21 );
303 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, e0, FLA_ONE, z21 );
304 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, d0, FLA_ONE, z21 );
305 // FLA_Copy( d0, t01 );
307 n_behind,
308 buff_m1,
309 A20, rs_A, cs_A,
310 Y20, rs_Y, cs_Y,
311 Z20, rs_Z, cs_Z,
312 t01, rs_T,
313 a21, rs_A,
314 y21, rs_Y,
315 z21, rs_Z );
316
317 // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
318 // FLA_Inv_scal( FLA_TWO, beta );
319 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
321 m_ahead,
322 a21, rs_A,
323 z21, rs_Z,
324 &beta );
327
328 // FLA_Scal( minus_inv_tau11, conj_beta );
329 // FLA_Axpy( conj_beta, a21, y21 );
330 // FLA_Scal( inv_tau11, y21 );
333 m_ahead,
334 &conj_beta,
335 a21, rs_A,
336 y21, rs_Y );
338 m_ahead,
339 &inv_tau11,
340 y21, rs_Y );
341
342 // FLA_Scal( minus_inv_tau11, beta );
343 // FLA_Axpy( beta, a21, z21 );
344 // FLA_Scal( inv_tau11, z21 );
347 m_ahead,
348 &beta,
349 a21, rs_A,
350 z21, rs_Z );
352 m_ahead,
353 &inv_tau11,
354 z21, rs_Z );
355
356 // FLA_Dot( a12t, a21, dot_product );
357 // FLA_Scal( minus_inv_tau11, dot_product );
358 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
360 m_ahead,
361 a12t, cs_A,
362 a21, rs_A,
363 &dot_product );
366 m_ahead,
368 a21, rs_A,
369 a12t, cs_A );
370
371 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, e0 );
372 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, e0, a21, A02 );
375 m_behind,
376 n_ahead,
377 buff_1,
378 A02, rs_A, cs_A,
379 a21, rs_A,
380 buff_0,
381 e0, inc_e );
384 m_behind,
385 n_ahead,
387 e0, inc_e,
388 a21, rs_A,
389 A02, rs_A, cs_A );
390
391 // FLA_Copy( first_elem, a21_t );
392 *a21_t = first_elem;
393 }
394
395 /*------------------------------------------------------------*/
396
397 }
398
399 // FLA_Obj_free( &e );
400 FLA_free( buff_e );
401
402 return FLA_SUCCESS;
403}
FLA_Error FLA_Fused_Uhu_Yhu_Zhu_ops_var1(int m_U, int n_U, float *buff_delta, float *buff_U, int rs_U, int cs_U, float *buff_Y, int rs_Y, int cs_Y, float *buff_Z, int rs_Z, int cs_Z, float *buff_t, int inc_t, float *buff_u, int inc_u, float *buff_y, int inc_y, float *buff_z, int inc_z)
Definition FLA_Fused_Uhu_Yhu_Zhu_opt_var1.c:156
void bl1_ssetm(int m, int n, float *sigma, float *a, int a_rs, int a_cs)
Definition bl1_setm.c:29

References bl1_saxpyv(), bl1_sdot(), bl1_sgemv(), bl1_sger(), bl1_sscalv(), bl1_ssetm(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Fused_Ahx_Ax_ops_var1(), FLA_Fused_Uhu_Yhu_Zhu_ops_var1(), FLA_Househ2_UT_l_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_ofu_var4().

◆ FLA_Hess_UT_step_ofu_var1()

FLA_Error FLA_Hess_UT_step_ofu_var1 ( FLA_Obj  A,
FLA_Obj  T 
)

◆ FLA_Hess_UT_step_ofu_var2()

FLA_Error FLA_Hess_UT_step_ofu_var2 ( FLA_Obj  A,
FLA_Obj  T 
)
19{
20 FLA_Datatype datatype;
21 int m_A, m_T;
22 int rs_A, cs_A;
23 int rs_T, cs_T;
24
25 datatype = FLA_Obj_datatype( A );
26
27 m_A = FLA_Obj_length( A );
28 m_T = FLA_Obj_length( T );
29
32
35
36
37 switch ( datatype )
38 {
39 case FLA_FLOAT:
40 {
41 float* buff_A = FLA_FLOAT_PTR( A );
42 float* buff_T = FLA_FLOAT_PTR( T );
43
45 m_T,
47 buff_T, rs_T, cs_T );
48
49 break;
50 }
51
52 case FLA_DOUBLE:
53 {
54 double* buff_A = FLA_DOUBLE_PTR( A );
55 double* buff_T = FLA_DOUBLE_PTR( T );
56
58 m_T,
60 buff_T, rs_T, cs_T );
61
62 break;
63 }
64
65 case FLA_COMPLEX:
66 {
69
71 m_T,
73 buff_T, rs_T, cs_T );
74
75 break;
76 }
77
79 {
82
84 m_T,
86 buff_T, rs_T, cs_T );
87
88 break;
89 }
90 }
91
92 return FLA_SUCCESS;
93}
FLA_Error FLA_Hess_UT_step_ofd_var2(int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T)
Definition FLA_Hess_UT_fus_var2.c:295
FLA_Error FLA_Hess_UT_step_ofs_var2(int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T)
Definition FLA_Hess_UT_fus_var2.c:97
FLA_Error FLA_Hess_UT_step_ofc_var2(int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T)
Definition FLA_Hess_UT_fus_var2.c:493
FLA_Error FLA_Hess_UT_step_ofz_var2(int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T)
Definition FLA_Hess_UT_fus_var2.c:691
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition FLA_Query.c:167
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition FLA_Query.c:174

References FLA_Hess_UT_step_ofc_var2(), FLA_Hess_UT_step_ofd_var2(), FLA_Hess_UT_step_ofs_var2(), FLA_Hess_UT_step_ofz_var2(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and i.

Referenced by FLA_Hess_UT_blf_var2(), and FLA_Hess_UT_ofu_var2().

◆ FLA_Hess_UT_step_ofu_var3()

FLA_Error FLA_Hess_UT_step_ofu_var3 ( FLA_Obj  A,
FLA_Obj  T 
)
19{
20 FLA_Datatype datatype;
21 int m_A, m_T;
22 int rs_A, cs_A;
23 int rs_T, cs_T;
24
25 datatype = FLA_Obj_datatype( A );
26
27 m_A = FLA_Obj_length( A );
28 m_T = FLA_Obj_length( T );
29
32
35
36
37 switch ( datatype )
38 {
39 case FLA_FLOAT:
40 {
41 float* buff_A = FLA_FLOAT_PTR( A );
42 float* buff_T = FLA_FLOAT_PTR( T );
43
45 m_T,
47 buff_T, rs_T, cs_T );
48
49 break;
50 }
51
52 case FLA_DOUBLE:
53 {
54 double* buff_A = FLA_DOUBLE_PTR( A );
55 double* buff_T = FLA_DOUBLE_PTR( T );
56
58 m_T,
60 buff_T, rs_T, cs_T );
61
62 break;
63 }
64
65 case FLA_COMPLEX:
66 {
69
71 m_T,
73 buff_T, rs_T, cs_T );
74
75 break;
76 }
77
79 {
82
84 m_T,
86 buff_T, rs_T, cs_T );
87
88 break;
89 }
90 }
91
92 return FLA_SUCCESS;
93}
FLA_Error FLA_Hess_UT_step_ofs_var3(int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T)
Definition FLA_Hess_UT_fus_var3.c:97
FLA_Error FLA_Hess_UT_step_ofz_var3(int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T)
Definition FLA_Hess_UT_fus_var3.c:1075
FLA_Error FLA_Hess_UT_step_ofd_var3(int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T)
Definition FLA_Hess_UT_fus_var3.c:423
FLA_Error FLA_Hess_UT_step_ofc_var3(int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T)
Definition FLA_Hess_UT_fus_var3.c:749

References FLA_Hess_UT_step_ofc_var3(), FLA_Hess_UT_step_ofd_var3(), FLA_Hess_UT_step_ofs_var3(), FLA_Hess_UT_step_ofz_var3(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and i.

Referenced by FLA_Hess_UT_blf_var3(), and FLA_Hess_UT_ofu_var3().

◆ FLA_Hess_UT_step_ofu_var4()

FLA_Error FLA_Hess_UT_step_ofu_var4 ( FLA_Obj  A,
FLA_Obj  Y,
FLA_Obj  Z,
FLA_Obj  T 
)
30{
31 FLA_Datatype datatype;
32 int m_A, m_T;
33 int rs_A, cs_A;
34 int rs_Y, cs_Y;
35 int rs_Z, cs_Z;
36 int rs_T, cs_T;
37
38 datatype = FLA_Obj_datatype( A );
39
40 m_A = FLA_Obj_length( A );
41 m_T = FLA_Obj_length( T );
42
45
48
51
54
55
56 switch ( datatype )
57 {
58 case FLA_FLOAT:
59 {
60 float* buff_A = FLA_FLOAT_PTR( A );
61 float* buff_Y = FLA_FLOAT_PTR( Y );
62 float* buff_Z = FLA_FLOAT_PTR( Z );
63 float* buff_T = FLA_FLOAT_PTR( T );
64
66 m_T,
70 buff_T, rs_T, cs_T );
71
72 break;
73 }
74
75 case FLA_DOUBLE:
76 {
77 double* buff_A = FLA_DOUBLE_PTR( A );
78 double* buff_Y = FLA_DOUBLE_PTR( Y );
79 double* buff_Z = FLA_DOUBLE_PTR( Z );
80 double* buff_T = FLA_DOUBLE_PTR( T );
81
83 m_T,
87 buff_T, rs_T, cs_T );
88
89 break;
90 }
91
92 case FLA_COMPLEX:
93 {
98
100 m_T,
101 buff_A, rs_A, cs_A,
102 buff_Y, rs_Y, cs_Y,
103 buff_Z, rs_Z, cs_Z,
104 buff_T, rs_T, cs_T );
105
106 break;
107 }
108
110 {
115
117 m_T,
118 buff_A, rs_A, cs_A,
119 buff_Y, rs_Y, cs_Y,
120 buff_Z, rs_Z, cs_Z,
121 buff_T, rs_T, cs_T );
122
123 break;
124 }
125 }
126
127 return FLA_SUCCESS;
128}
FLA_Error FLA_Hess_UT_step_ofz_var4(int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_Y, int rs_Y, int cs_Y, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_T, int rs_T, int cs_T)
Definition FLA_Hess_UT_fus_var4.c:957
FLA_Error FLA_Hess_UT_step_ofs_var4(int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_Y, int rs_Y, int cs_Y, float *buff_Z, int rs_Z, int cs_Z, float *buff_T, int rs_T, int cs_T)
Definition FLA_Hess_UT_fus_var4.c:132
FLA_Error FLA_Hess_UT_step_ofd_var4(int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_Y, int rs_Y, int cs_Y, double *buff_Z, int rs_Z, int cs_Z, double *buff_T, int rs_T, int cs_T)
Definition FLA_Hess_UT_fus_var4.c:407
FLA_Error FLA_Hess_UT_step_ofc_var4(int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_Y, int rs_Y, int cs_Y, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_T, int rs_T, int cs_T)
Definition FLA_Hess_UT_fus_var4.c:682

References FLA_Hess_UT_step_ofc_var4(), FLA_Hess_UT_step_ofd_var4(), FLA_Hess_UT_step_ofs_var4(), FLA_Hess_UT_step_ofz_var4(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and i.

Referenced by FLA_Hess_UT_blf_var4(), and FLA_Hess_UT_ofu_var4().

◆ FLA_Hess_UT_step_ofz_var1()

FLA_Error FLA_Hess_UT_step_ofz_var1 ( int  m_A,
int  m_T,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_T,
int  rs_T,
int  cs_T 
)

◆ FLA_Hess_UT_step_ofz_var2()

FLA_Error FLA_Hess_UT_step_ofz_var2 ( int  m_A,
int  m_T,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_T,
int  rs_T,
int  cs_T 
)
695{
700
706 int i;
707
708 // b_alg = FLA_Obj_length( T );
709 int b_alg = m_T;
710
711 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &y );
712 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
713 dcomplex* buff_y = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
714 dcomplex* buff_z = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
715 int inc_y = 1;
716 int inc_z = 1;
717
718 for ( i = 0; i < b_alg; ++i )
719 {
720 dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
721 dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
722 dcomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
723 dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
724 dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
725
726 dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
727 dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
728
729 dcomplex* y0 = buff_y + (0 )*inc_y;
730 dcomplex* y2 = buff_y + (i+1)*inc_y;
731
732 dcomplex* z2 = buff_z + (i+1)*inc_z;
733
734 dcomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
735 dcomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
736
737 int m_ahead = m_A - i - 1;
738 int n_ahead = m_A - i - 1;
739 int m_behind = i;
740 int n_behind = i;
741
742 /*------------------------------------------------------------*/
743
744 if ( m_ahead > 0 )
745 {
746 // FLA_Househ2_UT( FLA_LEFT,
747 // a21_t,
748 // a21_b, tau11 );
750 a21_t,
751 a21_b, rs_A,
752 tau11 );
753
754 // FLA_Set( FLA_ONE, inv_tau11 );
755 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
756 // FLA_Copy( inv_tau11, minus_inv_tau11 );
757 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
760
761 // FLA_Copy( a21_t, first_elem );
762 // FLA_Set( FLA_ONE, a21_t );
763 first_elem = *a21_t;
764 *a21_t = *buff_1;
765
766 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, y2 );
767 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, z2 );
769 n_ahead,
770 A22, rs_A, cs_A,
771 a21, rs_A,
772 y2, inc_y,
773 z2, inc_z );
774
775 // FLA_Dotc( FLA_CONJUGATE, a21, z2, beta );
776 // FLA_Inv_scal( FLA_TWO, beta );
777 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
779 m_ahead,
780 a21, rs_A,
781 z2, inc_z,
782 &beta );
785
786 // FLA_Scal( minus_inv_tau11, conj_beta );
787 // FLA_Axpy( conj_beta, a21, y2 );
788 // FLA_Scal( inv_tau11, y2 );
791 m_ahead,
792 &conj_beta,
793 a21, rs_A,
794 y2, inc_y );
796 m_ahead,
797 &inv_tau11,
798 y2, inc_y );
799
800 // FLA_Scal( minus_inv_tau11, beta );
801 // FLA_Axpy( beta, a21, z2 );
802 // FLA_Scal( inv_tau11, z2 );
805 m_ahead,
806 &beta,
807 a21, rs_A,
808 z2, inc_z );
810 m_ahead,
811 &inv_tau11,
812 z2, inc_z );
813
814 // FLA_Dot( a12t, a21, dot_product );
815 // FLA_Scal( minus_inv_tau11, dot_product );
816 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
818 m_ahead,
819 a12t, cs_A,
820 a21, rs_A,
821 &dot_product );
824 m_ahead,
826 a21, rs_A,
827 a12t, cs_A );
828
829 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, y0 );
830 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, y0, a21, A02 );
833 m_behind,
834 n_ahead,
835 buff_1,
836 A02, rs_A, cs_A,
837 a21, rs_A,
838 buff_0,
839 y0, inc_y );
842 m_behind,
843 n_ahead,
845 y0, inc_y,
846 a21, rs_A,
847 A02, rs_A, cs_A );
848
849 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, a21, y2, A22 );
850 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, a21, A22 );
852 n_ahead,
853 buff_m1,
854 a21, rs_A,
855 y2, inc_y,
856 z2, inc_z,
857 a21, rs_A,
858 A22, rs_A, cs_A );
859
860 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
863 m_ahead,
864 n_behind,
865 buff_1,
866 A20, rs_A, cs_A,
867 a21, rs_A,
868 buff_0,
869 t01, rs_T );
870
871 // FLA_Copy( first_elem, a21_t );
872 *a21_t = first_elem;
873 }
874
875 /*------------------------------------------------------------*/
876
877 }
878
879 // FLA_Obj_free( &y );
880 // FLA_Obj_free( &z );
881 FLA_free( buff_y );
882 FLA_free( buff_z );
883
884 return FLA_SUCCESS;
885}
FLA_Error FLA_Fused_Gerc2_opz_var1(int m_A, int n_A, dcomplex *buff_alpha, dcomplex *buff_u, int inc_u, dcomplex *buff_y, int inc_y, dcomplex *buff_z, int inc_z, dcomplex *buff_v, int inc_v, dcomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Fused_Gerc2_opt_var1.c:306
FLA_Error FLA_Fused_Ahx_Ax_opz_var1(int m_A, int n_A, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_x, int inc_x, dcomplex *buff_v, int inc_v, dcomplex *buff_w, int inc_w)
Definition FLA_Fused_Ahx_Ax_opt_var1.c:307
FLA_Error FLA_Househ2_UT_l_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *tau)
Definition FLA_Househ2_UT.c:521
void bl1_zaxpyv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy)
Definition bl1_axpyv.c:60
void bl1_zdot(conj1_t conj, int n, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *rho)
Definition bl1_dot.c:65
void bl1_zgemv(trans1_t transa, conj1_t conjx, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition bl1_gemv.c:255
void bl1_zger(conj1_t conjx, conj1_t conjy, int m, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *a, int a_rs, int a_cs)
Definition bl1_ger.c:194
void bl1_zscalv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx)
Definition bl1_scalv.c:72

References bl1_zaxpyv(), bl1_zdot(), bl1_zgemv(), bl1_zger(), bl1_zscals(), bl1_zscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Fused_Ahx_Ax_opz_var1(), FLA_Fused_Gerc2_opz_var1(), FLA_Househ2_UT_l_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_ofu_var2().

◆ FLA_Hess_UT_step_ofz_var3()

FLA_Error FLA_Hess_UT_step_ofz_var3 ( int  m_A,
int  m_T,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_T,
int  rs_T,
int  cs_T 
)
1079{
1084
1093 int i;
1094
1095 // b_alg = FLA_Obj_length( T );
1096 int b_alg = m_T;
1097
1098 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
1099 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &y );
1100 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
1101 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &v );
1102 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
1103 dcomplex* buff_u = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1104 dcomplex* buff_y = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1105 dcomplex* buff_z = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1106 dcomplex* buff_v = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1107 dcomplex* buff_w = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1108 int inc_u = 1;
1109 int inc_y = 1;
1110 int inc_z = 1;
1111 int inc_v = 1;
1112 int inc_w = 1;
1113
1114 for ( i = 0; i < b_alg; ++i )
1115 {
1116 dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
1117 dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
1118 dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
1119 dcomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
1120 dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
1121 dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
1122
1123 dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
1124 dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
1125
1126 dcomplex* upsilon1 = buff_u + (i )*inc_u;
1127 dcomplex* u2 = buff_u + (i+1)*inc_u;
1128
1129 dcomplex* y0 = buff_y + (0 )*inc_y;
1130 dcomplex* psi1 = buff_y + (i )*inc_y;
1131 dcomplex* y2 = buff_y + (i+1)*inc_y;
1132
1133 dcomplex* zeta1 = buff_z + (i )*inc_z;
1134 dcomplex* z2 = buff_z + (i+1)*inc_z;
1135
1136 dcomplex* v2 = buff_v + (i+1)*inc_v;
1137
1138 dcomplex* w2 = buff_w + (i+1)*inc_w;
1139
1140 dcomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
1141 dcomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
1142
1143 int m_ahead = m_A - i - 1;
1144 int n_ahead = m_A - i - 1;
1145 int m_behind = i;
1146 int n_behind = i;
1147
1148 /*------------------------------------------------------------*/
1149
1150 if ( m_behind > 0 )
1151 {
1152 // FLA_Copy( upsilon1, minus_upsilon1 );
1153 // FLA_Scal( FLA_MINUS_ONE, minus_upsilon1 );
1154 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon1, minus_conj_upsilon1 );
1157
1158 // FLA_Copy( psi1, minus_psi1 );
1159 // FLA_Scal( FLA_MINUS_ONE, minus_psi1 );
1160 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, minus_psi1, minus_conj_psi1 );
1163
1164 // FLA_Copy( zeta1, minus_zeta1 );
1165 // FLA_Scal( FLA_MINUS_ONE, minus_zeta1 );
1167
1168 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon1, psi1, alpha11 );
1169 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_zeta1, upsilon1, alpha11 );
1171 1,
1173 psi1, 1,
1174 alpha11, 1 );
1176 1,
1177 &minus_zeta1,
1178 upsilon1, 1,
1179 alpha11, 1 );
1180
1181 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_upsilon1, y2, a12t );
1182 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_zeta1, u2, a12t );
1184 m_ahead,
1186 y2, inc_y,
1187 a12t, cs_A );
1189 m_ahead,
1190 &minus_zeta1,
1191 u2, inc_u,
1192 a12t, cs_A );
1193
1194 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi1, u2, a21 );
1195 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_upsilon1, z2, a21 );
1197 m_ahead,
1199 u2, inc_u,
1200 a21, rs_A );
1202 m_ahead,
1204 z2, inc_z,
1205 a21, rs_A );
1206 }
1207
1208 if ( m_ahead > 0 )
1209 {
1210 // FLA_Househ2_UT( FLA_LEFT,
1211 // a21_t,
1212 // a21_b, tau11 );
1214 a21_t,
1215 a21_b, rs_A,
1216 tau11 );
1217
1218 // FLA_Set( FLA_ONE, inv_tau11 );
1219 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
1220 // FLA_Copy( inv_tau11, minus_inv_tau11 );
1221 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
1224
1225 // FLA_Copy( a21_t, first_elem );
1226 // FLA_Set( FLA_ONE, a21_t );
1227 first_elem = *a21_t;
1228 *a21_t = *buff_1;
1229 }
1230
1231 if ( m_behind > 0 && m_ahead > 0 )
1232 {
1233 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u2, y2, A22 );
1234 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, u2, A22 );
1235 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, v2 );
1236 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, w2 );
1238 n_ahead,
1239 buff_m1,
1240 u2, inc_u,
1241 y2, inc_y,
1242 z2, inc_z,
1243 A22, rs_A, cs_A,
1244 a21, rs_A,
1245 v2, inc_v,
1246 w2, inc_w );
1247 }
1248 else if ( m_ahead > 0 )
1249 {
1250 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, v2 );
1251 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, w2 );
1253 n_ahead,
1254 A22, rs_A, cs_A,
1255 a21, rs_A,
1256 v2, inc_v,
1257 w2, inc_w );
1258 }
1259
1260 if ( m_ahead > 0 )
1261 {
1262 // FLA_Copy( a21, u2 );
1263 // FLA_Copy( v2, y2 );
1264 // FLA_Copy( w2, z2 );
1266 m_ahead,
1267 a21, rs_A,
1268 u2, inc_u );
1270 m_ahead,
1271 v2, inc_v,
1272 y2, inc_y );
1274 m_ahead,
1275 w2, inc_w,
1276 z2, inc_z );
1277
1278 // FLA_Dotc( FLA_CONJUGATE, a21, z2, beta );
1279 // FLA_Inv_scal( FLA_TWO, beta );
1280 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
1282 m_ahead,
1283 a21, rs_A,
1284 z2, inc_z,
1285 &beta );
1288
1289 // FLA_Scal( minus_inv_tau11, conj_beta );
1290 // FLA_Axpy( conj_beta, a21, y2 );
1291 // FLA_Scal( inv_tau11, y2 );
1294 m_ahead,
1295 &conj_beta,
1296 a21, rs_A,
1297 y2, inc_y );
1299 m_ahead,
1300 &inv_tau11,
1301 y2, inc_y );
1302
1303 // FLA_Scal( minus_inv_tau11, beta );
1304 // FLA_Axpy( beta, a21, z2 );
1305 // FLA_Scal( inv_tau11, z2 );
1308 m_ahead,
1309 &beta,
1310 a21, rs_A,
1311 z2, inc_z );
1313 m_ahead,
1314 &inv_tau11,
1315 z2, inc_z );
1316
1317 // FLA_Dot( a12t, a21, dot_product );
1318 // FLA_Scal( minus_inv_tau11, dot_product );
1319 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
1321 m_ahead,
1322 a12t, cs_A,
1323 a21, rs_A,
1324 &dot_product );
1327 m_ahead,
1328 &dot_product,
1329 a21, rs_A,
1330 a12t, cs_A );
1331
1332 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, y0 );
1333 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, y0, a21, A02 );
1336 m_behind,
1337 n_ahead,
1338 buff_1,
1339 A02, rs_A, cs_A,
1340 a21, rs_A,
1341 buff_0,
1342 y0, inc_y );
1345 m_behind,
1346 n_ahead,
1348 y0, inc_y,
1349 a21, rs_A,
1350 A02, rs_A, cs_A );
1351
1352 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
1355 m_ahead,
1356 n_behind,
1357 buff_1,
1358 A20, rs_A, cs_A,
1359 a21, rs_A,
1360 buff_0,
1361 t01, rs_T );
1362
1363 // FLA_Copy( first_elem, a21_t );
1364 *a21_t = first_elem;
1365 }
1366
1367 if ( m_behind + 1 == b_alg && m_ahead > 0 )
1368 {
1369 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u2, y2, A22 );
1370 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, u2, A22 );
1372 n_ahead,
1373 buff_m1,
1374 u2, inc_u,
1375 y2, inc_y,
1376 z2, inc_z,
1377 u2, inc_u,
1378 A22, rs_A, cs_A );
1379 }
1380
1381 /*------------------------------------------------------------*/
1382
1383 }
1384
1385 // FLA_Obj_free( &u );
1386 // FLA_Obj_free( &y );
1387 // FLA_Obj_free( &z );
1388 // FLA_Obj_free( &v );
1389 // FLA_Obj_free( &w );
1390 FLA_free( buff_u );
1391 FLA_free( buff_y );
1392 FLA_free( buff_z );
1393 FLA_free( buff_v );
1394 FLA_free( buff_w );
1395
1396 return FLA_SUCCESS;
1397}
FLA_Error FLA_Fused_Gerc2_Ahx_Ax_opz_var1(int m_A, int n_A, dcomplex *buff_alpha, dcomplex *buff_u, int inc_u, dcomplex *buff_y, int inc_y, dcomplex *buff_z, int inc_z, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_x, int inc_x, dcomplex *buff_v, int inc_v, dcomplex *buff_w, int inc_w)
Definition FLA_Fused_Gerc2_Ahx_Ax_opt_var1.c:421
void bl1_zcopyv(conj1_t conj, int m, dcomplex *x, int incx, dcomplex *y, int incy)
Definition bl1_copyv.c:63

References bl1_zaxpyv(), bl1_zcopyv(), bl1_zdot(), bl1_zgemv(), bl1_zger(), bl1_zscals(), bl1_zscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Fused_Ahx_Ax_opz_var1(), FLA_Fused_Gerc2_Ahx_Ax_opz_var1(), FLA_Fused_Gerc2_opz_var1(), FLA_Househ2_UT_l_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, i, psi1, upsilon1, and zeta1.

Referenced by FLA_Hess_UT_step_ofu_var3().

◆ FLA_Hess_UT_step_ofz_var4()

FLA_Error FLA_Hess_UT_step_ofz_var4 ( int  m_A,
int  m_T,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_Y,
int  rs_Y,
int  cs_Y,
dcomplex buff_Z,
int  rs_Z,
int  cs_Z,
dcomplex buff_T,
int  rs_T,
int  cs_T 
)
963{
968
974 int i;
975
976 // b_alg = FLA_Obj_length( T );
977 int b_alg = m_T;
978
979 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &d );
980 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &e );
981 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
982 dcomplex* buff_e = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
983 int inc_e = 1;
984
985 // FLA_Set( FLA_ZERO, Y );
986 // FLA_Set( FLA_ZERO, Z );
987 bl1_zsetm( m_A,
988 b_alg,
989 buff_0,
990 buff_Y, rs_Y, cs_Y );
991 bl1_zsetm( m_A,
992 b_alg,
993 buff_0,
994 buff_Z, rs_Z, cs_Z );
995
996 for ( i = 0; i < b_alg; ++i )
997 {
998 dcomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
999 dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
1000 dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
1001 dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
1002 dcomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
1003 dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
1004 dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
1005
1006 dcomplex* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
1007 dcomplex* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
1008 dcomplex* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
1009
1010 dcomplex* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
1011 dcomplex* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
1012 dcomplex* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
1013
1014 dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
1015 dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
1016
1017 dcomplex* e0 = buff_e + (0 )*inc_e;
1018
1019 dcomplex* a10t_r = a10t + (i-1)*cs_A + (0 )*rs_A;
1020
1021 dcomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
1022 dcomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
1023
1024 dcomplex* ABL = a10t;
1025 dcomplex* ZBL = z10t;
1026
1027 dcomplex* a2 = alpha11;
1028
1029 int m_ahead = m_A - i - 1;
1030 int n_ahead = m_A - i - 1;
1031 int m_behind = i;
1032 int n_behind = i;
1033
1034 /*------------------------------------------------------------*/
1035
1036 if ( m_behind > 0 )
1037 {
1038 // FLA_Copy( a10t_r, last_elem );
1039 // FLA_Set( FLA_ONE, a10t_r );
1040 last_elem = *a10t_r;
1041 *a10t_r = *buff_1;
1042 }
1043
1044 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
1045 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a10t, FLA_ONE, a2 );
1048 m_ahead + 1,
1049 n_behind,
1050 buff_m1,
1051 ABL, rs_A, cs_A,
1052 y10t, cs_Y,
1053 buff_1,
1054 a2, rs_A );
1057 m_ahead + 1,
1058 n_behind,
1059 buff_m1,
1060 ZBL, rs_Z, cs_Z,
1061 a10t, cs_A,
1062 buff_1,
1063 a2, rs_A );
1064
1065 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
1066 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, A20, z10t, FLA_ONE, a12t );
1069 m_ahead,
1070 n_behind,
1071 buff_m1,
1072 Y20, rs_Y, cs_Y,
1073 a10t, cs_A,
1074 buff_1,
1075 a12t, cs_A );
1078 m_ahead,
1079 n_behind,
1080 buff_m1,
1081 A20, rs_A, cs_A,
1082 z10t, cs_Z,
1083 buff_1,
1084 a12t, cs_A );
1085
1086 if ( m_behind > 0 )
1087 {
1088 // FLA_Copy( last_elem, a10t_r );
1089 *a10t_r = last_elem;
1090 }
1091
1092 if ( m_ahead > 0 )
1093 {
1094 // FLA_Househ2_UT( FLA_LEFT,
1095 // a21_t,
1096 // a21_b, tau11 );
1098 a21_t,
1099 a21_b, rs_A,
1100 tau11 );
1101
1102 // FLA_Set( FLA_ONE, inv_tau11 );
1103 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
1104 // FLA_Copy( inv_tau11, minus_inv_tau11 );
1105 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
1108
1109 // FLA_Copy( a21_t, first_elem );
1110 // FLA_Set( FLA_ONE, a21_t );
1111 first_elem = *a21_t;
1112 *a21_t = *buff_1;
1113
1114 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, y21 );
1115 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, z21 );
1117 n_ahead,
1118 A22, rs_A, cs_A,
1119 a21, rs_A,
1120 y21, rs_Y,
1121 z21, rs_Z );
1122
1123 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, d0 );
1124 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Y20, a21, FLA_ZERO, e0 );
1125 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, a21, FLA_ZERO, f0 );
1126 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
1127 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, y21 );
1128 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, e0, FLA_ONE, z21 );
1129 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, d0, FLA_ONE, z21 );
1130 // FLA_Copy( d0, t01 );
1132 n_behind,
1133 buff_m1,
1134 A20, rs_A, cs_A,
1135 Y20, rs_Y, cs_Y,
1136 Z20, rs_Z, cs_Z,
1137 t01, rs_T,
1138 a21, rs_A,
1139 y21, rs_Y,
1140 z21, rs_Z );
1141
1142 // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
1143 // FLA_Inv_scal( FLA_TWO, beta );
1144 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
1146 m_ahead,
1147 a21, rs_A,
1148 z21, rs_Z,
1149 &beta );
1152
1153 // FLA_Scal( minus_inv_tau11, conj_beta );
1154 // FLA_Axpy( conj_beta, a21, y21 );
1155 // FLA_Scal( inv_tau11, y21 );
1158 m_ahead,
1159 &conj_beta,
1160 a21, rs_A,
1161 y21, rs_Y );
1163 m_ahead,
1164 &inv_tau11,
1165 y21, rs_Y );
1166
1167 // FLA_Scal( minus_inv_tau11, beta );
1168 // FLA_Axpy( beta, a21, z21 );
1169 // FLA_Scal( inv_tau11, z21 );
1172 m_ahead,
1173 &beta,
1174 a21, rs_A,
1175 z21, rs_Z );
1177 m_ahead,
1178 &inv_tau11,
1179 z21, rs_Z );
1180
1181 // FLA_Dot( a12t, a21, dot_product );
1182 // FLA_Scal( minus_inv_tau11, dot_product );
1183 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
1185 m_ahead,
1186 a12t, cs_A,
1187 a21, rs_A,
1188 &dot_product );
1191 m_ahead,
1192 &dot_product,
1193 a21, rs_A,
1194 a12t, cs_A );
1195
1196 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, e0 );
1197 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, e0, a21, A02 );
1200 m_behind,
1201 n_ahead,
1202 buff_1,
1203 A02, rs_A, cs_A,
1204 a21, rs_A,
1205 buff_0,
1206 e0, inc_e );
1209 m_behind,
1210 n_ahead,
1212 e0, inc_e,
1213 a21, rs_A,
1214 A02, rs_A, cs_A );
1215
1216 // FLA_Copy( first_elem, a21_t );
1217 *a21_t = first_elem;
1218 }
1219
1220 /*------------------------------------------------------------*/
1221
1222 }
1223
1224 // FLA_Obj_free( &e );
1225 FLA_free( buff_e );
1226
1227 return FLA_SUCCESS;
1228}
FLA_Error FLA_Fused_Uhu_Yhu_Zhu_opz_var1(int m_U, int n_U, dcomplex *buff_delta, dcomplex *buff_U, int rs_U, int cs_U, dcomplex *buff_Y, int rs_Y, int cs_Y, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_t, int inc_t, dcomplex *buff_u, int inc_u, dcomplex *buff_y, int inc_y, dcomplex *buff_z, int inc_z)
Definition FLA_Fused_Uhu_Yhu_Zhu_opt_var1.c:500
void bl1_zsetm(int m, int n, dcomplex *sigma, dcomplex *a, int a_rs, int a_cs)
Definition bl1_setm.c:78

References bl1_zaxpyv(), bl1_zdot(), bl1_zgemv(), bl1_zger(), bl1_zscals(), bl1_zscalv(), bl1_zsetm(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Fused_Ahx_Ax_opz_var1(), FLA_Fused_Uhu_Yhu_Zhu_opz_var1(), FLA_Househ2_UT_l_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_ofu_var4().

◆ FLA_Hess_UT_step_opc_var1()

FLA_Error FLA_Hess_UT_step_opc_var1 ( int  m_A,
int  m_T,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_T,
int  rs_T,
int  cs_T 
)
287{
290
292 int i;
293
294 // b_alg = FLA_Obj_length( T );
295 int b_alg = m_T;
296
297 for ( i = 0; i < b_alg; ++i )
298 {
299 scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
300 scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
301
302 scomplex* a21_t = buff_A + (i )*cs_A + (i+1)*rs_A;
303 scomplex* a21_b = buff_A + (i )*cs_A + (i+2)*rs_A;
304
305 scomplex* A22_t = buff_A + (i+1)*cs_A + (i+1)*rs_A;
306 scomplex* A22_b = buff_A + (i+1)*cs_A + (i+2)*rs_A;
307
308 scomplex* A2_l = buff_A + (i+1)*cs_A + (0 )*rs_A;
309 scomplex* A2_r = buff_A + (i+2)*cs_A + (0 )*rs_A;
310
311 scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
312 scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
313
314 int m_ahead = m_A - i - 1;
315 int n_ahead = m_A - i - 1;
316 int n_behind = i;
317
318 /*------------------------------------------------------------*/
319
320 if ( m_ahead > 0 )
321 {
322 // FLA_Househ2_UT( FLA_LEFT,
323 // a21_t,
324 // a21_b, tau11 );
326 a21_t,
327 a21_b, rs_A,
328 tau11 );
329
330 // FLA_Copy( a21_t, first_elem );
331 // FLA_Set( FLA_ONE, a21_t );
332 first_elem = *a21_t;
333 *a21_t = *buff_1;
334
335 // FLA_Apply_H2_UT( FLA_LEFT, tau11, a21_b, A22_t,
336 // A22_b );
338 n_ahead,
339 tau11,
340 a21_b, rs_A,
341 A22_t, cs_A,
342 A22_b, rs_A, cs_A );
343
344 // FLA_Apply_H2_UT( FLA_RIGHT, tau11, a21_b, A2_l, A2_r );
346 n_ahead - 1,
347 tau11,
348 a21_b, rs_A,
349 A2_l, rs_A,
350 A2_r, rs_A, cs_A );
351
352 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
355 m_ahead,
356 n_behind,
357 buff_1,
358 A20, rs_A, cs_A,
359 a21, rs_A,
360 buff_0,
361 t01, rs_T );
362
363 // FLA_Copy( first_elem, a21_t );
364 *a21_t = first_elem;
365 }
366
367 /*------------------------------------------------------------*/
368
369 }
370
371 return FLA_SUCCESS;
372}
FLA_Error FLA_Apply_H2_UT_l_opc_var1(int m_u2_A2, int n_a1t, scomplex *tau, scomplex *u2, int inc_u2, scomplex *a1t, int inc_a1t, scomplex *A2, int rs_A2, int cs_A2)
Definition FLA_Apply_H2_UT_l_opt_var1.c:269
FLA_Error FLA_Apply_H2_UT_r_opc_var1(int n_u2h_A2, int m_a1, scomplex *tau, scomplex *u2h, int inc_u2h, scomplex *a1, int inc_a1, scomplex *A2, int rs_A2, int cs_A2)
Definition FLA_Apply_H2_UT_r_opt_var1.c:254

References bl1_cgemv(), BLIS1_CONJ_TRANSPOSE, BLIS1_NO_CONJUGATE, FLA_Apply_H2_UT_l_opc_var1(), FLA_Apply_H2_UT_r_opc_var1(), FLA_Househ2_UT_l_opc(), FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_opt_var1().

◆ FLA_Hess_UT_step_opc_var2()

FLA_Error FLA_Hess_UT_step_opc_var2 ( int  m_A,
int  m_T,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_T,
int  rs_T,
int  cs_T 
)
539{
544
550 int i;
551
552 // b_alg = FLA_Obj_length( T );
553 int b_alg = m_T;
554
555 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &y );
556 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
557 scomplex* buff_y = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
558 scomplex* buff_z = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
559 int inc_y = 1;
560 int inc_z = 1;
561
562 for ( i = 0; i < b_alg; ++i )
563 {
564 scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
565 scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
566 scomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
567 scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
568 scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
569
570 scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
571 scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
572
573 scomplex* y0 = buff_y + (0 )*inc_y;
574 scomplex* y2 = buff_y + (i+1)*inc_y;
575
576 scomplex* z2 = buff_z + (i+1)*inc_z;
577
578 scomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
579 scomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
580
581 int m_ahead = m_A - i - 1;
582 int n_ahead = m_A - i - 1;
583 int m_behind = i;
584 int n_behind = i;
585
586 /*------------------------------------------------------------*/
587
588 if ( m_ahead > 0 )
589 {
590 // FLA_Househ2_UT( FLA_LEFT,
591 // a21_t,
592 // a21_b, tau11 );
594 a21_t,
595 a21_b, rs_A,
596 tau11 );
597
598 // FLA_Set( FLA_ONE, inv_tau11 );
599 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
600 // FLA_Copy( inv_tau11, minus_inv_tau11 );
601 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
604
605 // FLA_Copy( a21_t, first_elem );
606 // FLA_Set( FLA_ONE, a21_t );
607 first_elem = *a21_t;
608 *a21_t = *buff_1;
609
610 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, y2 );
613 m_ahead,
614 n_ahead,
615 buff_1,
616 A22, rs_A, cs_A,
617 a21, rs_A,
618 buff_0,
619 y2, inc_y );
620
621 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, z2 );
624 m_ahead,
625 n_ahead,
626 buff_1,
627 A22, rs_A, cs_A,
628 a21, rs_A,
629 buff_0,
630 z2, inc_z );
631
632 // FLA_Dotc( FLA_CONJUGATE, a21, z2, beta );
633 // FLA_Inv_scal( FLA_TWO, beta );
634 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
636 m_ahead,
637 a21, rs_A,
638 z2, inc_z,
639 &beta );
642
643 // FLA_Scal( minus_inv_tau11, conj_beta );
644 // FLA_Axpy( conj_beta, a21, y2 );
645 // FLA_Scal( inv_tau11, y2 );
648 m_ahead,
649 &conj_beta,
650 a21, rs_A,
651 y2, inc_y );
653 m_ahead,
654 &inv_tau11,
655 y2, inc_y );
656
657 // FLA_Scal( minus_inv_tau11, beta );
658 // FLA_Axpy( beta, a21, z2 );
659 // FLA_Scal( inv_tau11, z2 );
662 m_ahead,
663 &beta,
664 a21, rs_A,
665 z2, inc_z );
667 m_ahead,
668 &inv_tau11,
669 z2, inc_z );
670
671 // FLA_Dot( a12t, a21, dot_product );
672 // FLA_Scal( minus_inv_tau11, dot_product );
673 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
675 m_ahead,
676 a12t, cs_A,
677 a21, rs_A,
678 &dot_product );
681 m_ahead,
683 a21, rs_A,
684 a12t, cs_A );
685
686 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, y0 );
687 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, y0, a21, A02 );
690 m_behind,
691 n_ahead,
692 buff_1,
693 A02, rs_A, cs_A,
694 a21, rs_A,
695 buff_0,
696 y0, inc_y );
699 m_behind,
700 n_ahead,
702 y0, inc_y,
703 a21, rs_A,
704 A02, rs_A, cs_A );
705
706 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, a21, y2, A22 );
707 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, a21, A22 );
710 m_ahead,
711 n_ahead,
712 buff_m1,
713 a21, rs_A,
714 y2, inc_y,
715 A22, rs_A, cs_A );
718 m_ahead,
719 n_ahead,
720 buff_m1,
721 z2, inc_z,
722 a21, rs_A,
723 A22, rs_A, cs_A );
724
725 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
728 m_ahead,
729 n_behind,
730 buff_1,
731 A20, rs_A, cs_A,
732 a21, rs_A,
733 buff_0,
734 t01, rs_T );
735
736 // FLA_Copy( first_elem, a21_t );
737 *a21_t = first_elem;
738 }
739
740 /*------------------------------------------------------------*/
741
742 }
743
744 // FLA_Obj_free( &y );
745 // FLA_Obj_free( &z );
746 FLA_free( buff_y );
747 FLA_free( buff_z );
748
749 return FLA_SUCCESS;
750}

References bl1_caxpyv(), bl1_cdot(), bl1_cgemv(), bl1_cger(), bl1_cscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_opt_var2().

◆ FLA_Hess_UT_step_opc_var3()

FLA_Error FLA_Hess_UT_step_opc_var3 ( int  m_A,
int  m_T,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_T,
int  rs_T,
int  cs_T 
)
807{
812
821 int i;
822
823 // b_alg = FLA_Obj_length( T );
824 int b_alg = m_T;
825
826 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
827 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &y );
828 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
829 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &v );
830 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
831 scomplex* buff_u = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
832 scomplex* buff_y = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
833 scomplex* buff_z = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
834 scomplex* buff_v = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
835 scomplex* buff_w = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
836 int inc_u = 1;
837 int inc_y = 1;
838 int inc_z = 1;
839 int inc_v = 1;
840 int inc_w = 1;
841
842 // Initialize some variables (only to prevent compiler warnings).
845
846 for ( i = 0; i < b_alg; ++i )
847 {
848 scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
849 scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
850 scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
851 scomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
852 scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
853 scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
854
855 scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
856 scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
857
859 scomplex* u2 = buff_u + (i+1)*inc_u;
860
861 scomplex* y0 = buff_y + (0 )*inc_y;
862 scomplex* psi1 = buff_y + (i )*inc_y;
863 scomplex* y2 = buff_y + (i+1)*inc_y;
864
865 scomplex* zeta1 = buff_z + (i )*inc_z;
866 scomplex* z2 = buff_z + (i+1)*inc_z;
867
868 scomplex* v2 = buff_v + (i+1)*inc_v;
869
870 scomplex* w2 = buff_w + (i+1)*inc_w;
871
872 scomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
873 scomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
874
875 int m_ahead = m_A - i - 1;
876 int n_ahead = m_A - i - 1;
877 int m_behind = i;
878 int n_behind = i;
879
880 /*------------------------------------------------------------*/
881
882 if ( m_behind > 0 )
883 {
884 // FLA_Copy( upsilon1, minus_upsilon1 );
885 // FLA_Scal( FLA_MINUS_ONE, minus_upsilon1 );
886 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon1, minus_conj_upsilon1 );
889
890 // FLA_Copy( psi1, minus_psi1 );
891 // FLA_Scal( FLA_MINUS_ONE, minus_psi1 );
892 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, minus_psi1, minus_conj_psi1 );
895
896 // FLA_Copy( zeta1, minus_zeta1 );
897 // FLA_Scal( FLA_MINUS_ONE, minus_zeta1 );
899
900 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon1, psi1, alpha11 );
901 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_zeta1, upsilon1, alpha11 );
903 1,
905 psi1, 1,
906 alpha11, 1 );
908 1,
910 upsilon1, 1,
911 alpha11, 1 );
912
913 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_upsilon1, y2, a12t );
914 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_zeta1, u2, a12t );
916 m_ahead,
918 y2, inc_y,
919 a12t, cs_A );
921 m_ahead,
923 u2, inc_u,
924 a12t, cs_A );
925
926 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi1, u2, a21 );
927 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_upsilon1, z2, a21 );
929 m_ahead,
931 u2, inc_u,
932 a21, rs_A );
934 m_ahead,
936 z2, inc_z,
937 a21, rs_A );
938 }
939
940 if ( m_ahead > 0 )
941 {
942 // FLA_Househ2_UT( FLA_LEFT,
943 // a21_t,
944 // a21_b, tau11 );
946 a21_t,
947 a21_b, rs_A,
948 tau11 );
949
950 // FLA_Set( FLA_ONE, inv_tau11 );
951 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
952 // FLA_Copy( inv_tau11, minus_inv_tau11 );
953 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
956
957 // FLA_Copy( a21_t, first_elem );
958 // FLA_Set( FLA_ONE, a21_t );
959 first_elem = *a21_t;
960 *a21_t = *buff_1;
961 }
962
963 if ( m_behind > 0 )
964 {
965 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u2, y2, A22 );
966 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, u2, A22 );
969 m_ahead,
970 n_ahead,
971 buff_m1,
972 u2, inc_u,
973 y2, inc_y,
974 A22, rs_A, cs_A );
977 m_ahead,
978 n_ahead,
979 buff_m1,
980 z2, inc_z,
981 u2, inc_u,
982 A22, rs_A, cs_A );
983 }
984
985 if ( m_ahead > 0 )
986 {
987 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, v2 );
990 m_ahead,
991 n_ahead,
992 buff_1,
993 A22, rs_A, cs_A,
994 a21, rs_A,
995 buff_0,
996 v2, inc_v );
997
998 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, w2 );
1001 m_ahead,
1002 n_ahead,
1003 buff_1,
1004 A22, rs_A, cs_A,
1005 a21, rs_A,
1006 buff_0,
1007 w2, inc_w );
1008
1009 // FLA_Copy( a21, u2 );
1010 // FLA_Copy( v2, y2 );
1011 // FLA_Copy( w2, z2 );
1013 m_ahead,
1014 a21, rs_A,
1015 u2, inc_u );
1017 m_ahead,
1018 v2, inc_v,
1019 y2, inc_y );
1021 m_ahead,
1022 w2, inc_w,
1023 z2, inc_z );
1024
1025 // FLA_Dotc( FLA_CONJUGATE, a21, z2, beta );
1026 // FLA_Inv_scal( FLA_TWO, beta );
1027 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
1029 m_ahead,
1030 a21, rs_A,
1031 z2, inc_z,
1032 &beta );
1035
1036 // FLA_Scal( minus_inv_tau11, conj_beta );
1037 // FLA_Axpy( conj_beta, a21, y2 );
1038 // FLA_Scal( inv_tau11, y2 );
1041 m_ahead,
1042 &conj_beta,
1043 a21, rs_A,
1044 y2, inc_y );
1046 m_ahead,
1047 &inv_tau11,
1048 y2, inc_y );
1049
1050 // FLA_Scal( minus_inv_tau11, beta );
1051 // FLA_Axpy( beta, a21, z2 );
1052 // FLA_Scal( inv_tau11, z2 );
1055 m_ahead,
1056 &beta,
1057 a21, rs_A,
1058 z2, inc_z );
1060 m_ahead,
1061 &inv_tau11,
1062 z2, inc_z );
1063
1064 // FLA_Dot( a12t, a21, dot_product );
1065 // FLA_Scal( minus_inv_tau11, dot_product );
1066 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
1068 m_ahead,
1069 a12t, cs_A,
1070 a21, rs_A,
1071 &dot_product );
1074 m_ahead,
1075 &dot_product,
1076 a21, rs_A,
1077 a12t, cs_A );
1078
1079 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, y0 );
1080 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, y0, a21, A02 );
1083 m_behind,
1084 n_ahead,
1085 buff_1,
1086 A02, rs_A, cs_A,
1087 a21, rs_A,
1088 buff_0,
1089 y0, inc_y );
1092 m_behind,
1093 n_ahead,
1095 y0, inc_y,
1096 a21, rs_A,
1097 A02, rs_A, cs_A );
1098
1099 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
1102 m_ahead,
1103 n_behind,
1104 buff_1,
1105 A20, rs_A, cs_A,
1106 a21, rs_A,
1107 buff_0,
1108 t01, rs_T );
1109
1110 // FLA_Copy( first_elem, a21_t );
1111 *a21_t = first_elem;
1112 }
1113
1114 if ( m_behind + 1 == b_alg && m_ahead > 0 )
1115 {
1116 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u2, y2, A22 );
1117 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, u2, A22 );
1120 m_ahead,
1121 n_ahead,
1122 buff_m1,
1123 u2, inc_u,
1124 y2, inc_y,
1125 A22, rs_A, cs_A );
1128 m_ahead,
1129 n_ahead,
1130 buff_m1,
1131 z2, inc_z,
1132 u2, inc_u,
1133 A22, rs_A, cs_A );
1134 }
1135
1136 /*------------------------------------------------------------*/
1137
1138 }
1139
1140 // FLA_Obj_free( &u );
1141 // FLA_Obj_free( &y );
1142 // FLA_Obj_free( &z );
1143 // FLA_Obj_free( &v );
1144 // FLA_Obj_free( &w );
1145 FLA_free( buff_u );
1146 FLA_free( buff_y );
1147 FLA_free( buff_z );
1148 FLA_free( buff_v );
1149 FLA_free( buff_w );
1150
1151 return FLA_SUCCESS;
1152}

References bl1_caxpyv(), bl1_ccopyv(), bl1_cdot(), bl1_cgemv(), bl1_cger(), bl1_cscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, i, psi1, upsilon1, and zeta1.

Referenced by FLA_Hess_UT_step_opt_var3().

◆ FLA_Hess_UT_step_opc_var4()

FLA_Error FLA_Hess_UT_step_opc_var4 ( int  m_A,
int  m_T,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_Y,
int  rs_Y,
int  cs_Y,
scomplex buff_Z,
int  rs_Z,
int  cs_Z,
scomplex buff_T,
int  rs_T,
int  cs_T 
)
858{
863
869 int i;
870
871 // b_alg = FLA_Obj_length( T );
872 int b_alg = m_T;
873
874 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &d );
875 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &e );
876 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
877 scomplex* buff_d = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
878 scomplex* buff_e = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
879 scomplex* buff_f = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
880 int inc_d = 1;
881 int inc_e = 1;
882 int inc_f = 1;
883
884 // FLA_Set( FLA_ZERO, Y );
885 // FLA_Set( FLA_ZERO, Z );
886 bl1_csetm( m_A,
887 b_alg,
888 buff_0,
889 buff_Y, rs_Y, cs_Y );
890 bl1_csetm( m_A,
891 b_alg,
892 buff_0,
893 buff_Z, rs_Z, cs_Z );
894
895 for ( i = 0; i < b_alg; ++i )
896 {
897 scomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
898 scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
899 scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
900 scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
901 scomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
902 scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
903 scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
904
905 scomplex* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
906 scomplex* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
907 scomplex* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
908
909 scomplex* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
910 scomplex* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
911 scomplex* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
912
913 scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
914 scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
915
916 scomplex* d0 = buff_d + (0 )*inc_d;
917
918 scomplex* e0 = buff_e + (0 )*inc_e;
919
920 scomplex* f0 = buff_f + (0 )*inc_f;
921
922 scomplex* a10t_r = a10t + (i-1)*cs_A + (0 )*rs_A;
923
924 scomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
925 scomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
926
927 scomplex* ABL = a10t;
928 scomplex* ZBL = z10t;
929
931
932 int m_ahead = m_A - i - 1;
933 int n_ahead = m_A - i - 1;
934 int m_behind = i;
935 int n_behind = i;
936
937 /*------------------------------------------------------------*/
938
939 if ( m_behind > 0 )
940 {
941 // FLA_Copy( a10t_r, last_elem );
942 // FLA_Set( FLA_ONE, a10t_r );
943 last_elem = *a10t_r;
944 *a10t_r = *buff_1;
945 }
946
947 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
948 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a10t, FLA_ONE, a2 );
951 m_ahead + 1,
952 n_behind,
953 buff_m1,
954 ABL, rs_A, cs_A,
955 y10t, cs_Y,
956 buff_1,
957 a2, rs_A );
960 m_ahead + 1,
961 n_behind,
962 buff_m1,
963 ZBL, rs_Z, cs_Z,
964 a10t, cs_A,
965 buff_1,
966 a2, rs_A );
967
968 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
969 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, A20, z10t, FLA_ONE, a12t );
972 m_ahead,
973 n_behind,
974 buff_m1,
975 Y20, rs_Y, cs_Y,
976 a10t, cs_A,
977 buff_1,
978 a12t, cs_A );
981 m_ahead,
982 n_behind,
983 buff_m1,
984 A20, rs_A, cs_A,
985 z10t, cs_Z,
986 buff_1,
987 a12t, cs_A );
988
989 if ( m_behind > 0 )
990 {
991 // FLA_Copy( last_elem, a10t_r );
992 *a10t_r = last_elem;
993 }
994
995 if ( m_ahead > 0 )
996 {
997 // FLA_Househ2_UT( FLA_LEFT,
998 // a21_t,
999 // a21_b, tau11 );
1001 a21_t,
1002 a21_b, rs_A,
1003 tau11 );
1004
1005 // FLA_Set( FLA_ONE, inv_tau11 );
1006 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
1007 // FLA_Copy( inv_tau11, minus_inv_tau11 );
1008 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
1011
1012 // FLA_Copy( a21_t, first_elem );
1013 // FLA_Set( FLA_ONE, a21_t );
1014 first_elem = *a21_t;
1015 *a21_t = *buff_1;
1016
1017 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, y21 );
1020 m_ahead,
1021 n_ahead,
1022 buff_1,
1023 A22, rs_A, cs_A,
1024 a21, rs_A,
1025 buff_0,
1026 y21, rs_Y );
1027
1028 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, z21 );
1031 m_ahead,
1032 n_ahead,
1033 buff_1,
1034 A22, rs_A, cs_A,
1035 a21, rs_A,
1036 buff_0,
1037 z21, rs_Z );
1038
1039 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, d0 );
1040 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Y20, a21, FLA_ZERO, e0 );
1041 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, a21, FLA_ZERO, f0 );
1044 m_ahead,
1045 n_behind,
1046 buff_1,
1047 A20, rs_A, cs_A,
1048 a21, rs_A,
1049 buff_0,
1050 d0, inc_d );
1053 m_ahead,
1054 n_behind,
1055 buff_1,
1056 Y20, rs_Y, cs_Y,
1057 a21, rs_A,
1058 buff_0,
1059 e0, inc_e );
1062 m_ahead,
1063 n_behind,
1064 buff_1,
1065 Z20, rs_Z, cs_Z,
1066 a21, rs_A,
1067 buff_0,
1068 f0, inc_f );
1069
1070 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
1071 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, y21 );
1074 m_ahead,
1075 n_behind,
1076 buff_m1,
1077 Y20, rs_Y, cs_Y,
1078 d0, inc_d,
1079 buff_1,
1080 y21, rs_Y );
1083 m_ahead,
1084 n_behind,
1085 buff_m1,
1086 A20, rs_A, cs_A,
1087 f0, inc_f,
1088 buff_1,
1089 y21, rs_Y );
1090
1091 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, e0, FLA_ONE, z21 );
1092 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, d0, FLA_ONE, z21 );
1095 m_ahead,
1096 n_behind,
1097 buff_m1,
1098 A20, rs_A, cs_A,
1099 e0, inc_e,
1100 buff_1,
1101 z21, rs_Z );
1104 m_ahead,
1105 n_behind,
1106 buff_m1,
1107 Z20, rs_Z, cs_Z,
1108 d0, inc_d,
1109 buff_1,
1110 z21, rs_Z );
1111
1112 // FLA_Copy( d0, t01 );
1114 n_behind,
1115 d0, inc_d,
1116 t01, rs_T );
1117
1118 // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
1119 // FLA_Inv_scal( FLA_TWO, beta );
1120 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
1122 m_ahead,
1123 a21, rs_A,
1124 z21, rs_Z,
1125 &beta );
1128
1129 // FLA_Scal( minus_inv_tau11, conj_beta );
1130 // FLA_Axpy( conj_beta, a21, y21 );
1131 // FLA_Scal( inv_tau11, y21 );
1134 m_ahead,
1135 &conj_beta,
1136 a21, rs_A,
1137 y21, rs_Y );
1139 m_ahead,
1140 &inv_tau11,
1141 y21, rs_Y );
1142
1143 // FLA_Scal( minus_inv_tau11, beta );
1144 // FLA_Axpy( beta, a21, z21 );
1145 // FLA_Scal( inv_tau11, z21 );
1148 m_ahead,
1149 &beta,
1150 a21, rs_A,
1151 z21, rs_Z );
1153 m_ahead,
1154 &inv_tau11,
1155 z21, rs_Z );
1156
1157 // FLA_Dot( a12t, a21, dot_product );
1158 // FLA_Scal( minus_inv_tau11, dot_product );
1159 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
1161 m_ahead,
1162 a12t, cs_A,
1163 a21, rs_A,
1164 &dot_product );
1167 m_ahead,
1168 &dot_product,
1169 a21, rs_A,
1170 a12t, cs_A );
1171
1172 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, e0 );
1173 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, e0, a21, A02 );
1176 m_behind,
1177 n_ahead,
1178 buff_1,
1179 A02, rs_A, cs_A,
1180 a21, rs_A,
1181 buff_0,
1182 e0, inc_e );
1185 m_behind,
1186 n_ahead,
1188 e0, inc_e,
1189 a21, rs_A,
1190 A02, rs_A, cs_A );
1191
1192 // FLA_Copy( first_elem, a21_t );
1193 *a21_t = first_elem;
1194 }
1195
1196 /*------------------------------------------------------------*/
1197
1198 }
1199
1200 // FLA_Obj_free( &d );
1201 // FLA_Obj_free( &e );
1202 // FLA_Obj_free( &f );
1203 FLA_free( buff_d );
1204 FLA_free( buff_e );
1205 FLA_free( buff_f );
1206
1207 return FLA_SUCCESS;
1208}

References bl1_caxpyv(), bl1_ccopyv(), bl1_cdot(), bl1_cgemv(), bl1_cger(), bl1_cscalv(), bl1_csetm(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_opt_var4().

◆ FLA_Hess_UT_step_opc_var5()

FLA_Error FLA_Hess_UT_step_opc_var5 ( int  m_A,
int  m_T,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_U,
int  rs_U,
int  cs_U,
scomplex buff_Z,
int  rs_Z,
int  cs_Z,
scomplex buff_T,
int  rs_T,
int  cs_T 
)
644{
648 int i;
649
650 // b_alg = FLA_Obj_length( T );
651 int b_alg = m_T;
652
653 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
654 scomplex* buff_w = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
655 int inc_w = 1;
656
657 // FLA_Set( FLA_ZERO, U );
658 // FLA_Set( FLA_ZERO, Z );
659 bl1_csetm( m_A,
660 b_alg,
661 buff_0,
662 buff_U, rs_U, cs_U );
663 bl1_csetm( m_A,
664 b_alg,
665 buff_0,
666 buff_Z, rs_Z, cs_Z );
667
668 for ( i = 0; i < b_alg; ++i )
669 {
670 scomplex* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
671 scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
672 scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
673 scomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
674 scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
675 scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
676
677 scomplex* U00 = buff_U + (0 )*cs_U + (0 )*rs_U;
678 scomplex* u10t = buff_U + (0 )*cs_U + (i )*rs_U;
679 scomplex* U20 = buff_U + (0 )*cs_U + (i+1)*rs_U;
680 scomplex* u21 = buff_U + (i )*cs_U + (i+1)*rs_U;
681
682 scomplex* Z00 = buff_Z + (0 )*cs_Z + (0 )*rs_Z;
683 scomplex* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
684 scomplex* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
685 scomplex* z01 = buff_Z + (i )*cs_Z + (0 )*rs_Z;
686 scomplex* zeta11 = buff_Z + (i )*cs_Z + (i )*rs_Z;
687 scomplex* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
688
689 scomplex* T00 = buff_T + (0 )*cs_T + (0 )*rs_T;
690 scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
691 scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
692
693 scomplex* w0 = buff_w + (0 )*inc_w;
694
695 scomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
696 scomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
697
698 scomplex* u21_t = u21 + (0 )*cs_U + (0 )*rs_U;
699
700 int m_ahead = m_A - i - 1;
701 int n_ahead = m_A - i - 1;
702 int m_behind = i;
703 int n_behind = i;
704
705 /*------------------------------------------------------------*/
706
707 if ( m_behind > 0 )
708 {
709 // FLA_Copyt( FLA_CONJ_TRANSPOSE, u10t, w0 );
710 // FLA_Trsv( FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,
711 // T00, w0 );
713 m_behind,
714 u10t, cs_U,
715 w0, inc_w );
719 m_behind,
720 T00, rs_T, cs_T,
721 w0, inc_w );
722
723 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z00, w0, FLA_ONE, a01 );
724 // FLA_Dots( FLA_MINUS_ONE, z10t, w0, FLA_ONE, alpha11 );
725 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, w0, FLA_ONE, a21 );
728 m_behind,
729 n_behind,
730 buff_m1,
731 Z00, rs_Z, cs_Z,
732 w0, inc_w,
733 buff_1,
734 a01, rs_A );
736 m_behind,
737 buff_m1,
738 z10t, cs_Z,
739 w0, inc_w,
740 buff_1,
741 alpha11 );
744 m_ahead,
745 n_behind,
746 buff_m1,
747 Z20, rs_Z, cs_Z,
748 w0, inc_w,
749 buff_1,
750 a21, rs_A );
751
752 // FLA_Trmvsx( FLA_LOWER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG,
753 // FLA_ONE, U00, a01, FLA_ZERO, w0 );
754 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, alpha11, u10t, w0 );
755 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, U20, a21, FLA_ONE, w0 );
757 m_behind,
758 a01, rs_A,
759 w0, inc_w );
763 m_behind,
764 U00, rs_U, cs_U,
765 w0, inc_w );
767 m_behind,
768 alpha11,
769 u10t, cs_U,
770 w0, inc_w );
773 m_ahead,
774 n_behind,
775 buff_1,
776 U20, rs_U, cs_U,
777 a21, rs_A,
778 buff_1,
779 w0, inc_w );
780
781 // FLA_Trsv( FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG,
782 // T00, w0 );
786 m_behind,
787 T00, rs_T, cs_T,
788 w0, inc_w );
789
790 // FLA_Trmvsx( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,
791 // FLA_MINUS_ONE, U00, w0, FLA_ONE, a01 );
792 // FLA_Dots( FLA_MINUS_ONE, u10t, w0, FLA_ONE, alpha11 );
793 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, U20, w0, FLA_ONE, a21 );
797 m_behind,
798 buff_m1,
799 U00, rs_U, cs_U,
800 w0, inc_w,
801 buff_1,
802 a01, rs_A );
804 m_behind,
805 buff_m1,
806 u10t, cs_U,
807 w0, inc_w,
808 buff_1,
809 alpha11 );
812 m_ahead,
813 n_behind,
814 buff_m1,
815 U20, rs_U, cs_U,
816 w0, inc_w,
817 buff_1,
818 a21, rs_A );
819 }
820
821 if ( m_ahead > 0 )
822 {
823 // FLA_Househ2_UT( FLA_LEFT,
824 // a21_t,
825 // a21_b, tau11 );
827 a21_t,
828 a21_b, rs_A,
829 tau11 );
830
831 // FLA_Copy( a21, u21 );
833 m_ahead,
834 a21, rs_A,
835 u21, rs_U );
836
837 // FLA_Set( FLA_ONE, u21_t );
838 *u21_t = *buff_1;
839
840 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, u21, FLA_ZERO, z01 );
841 // FLA_Dot( a12t, u21, zeta11 );
842 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, u21, FLA_ZERO, z21 );
845 m_behind,
846 n_ahead,
847 buff_1,
848 A02, rs_A, cs_A,
849 u21, rs_U,
850 buff_0,
851 z01, rs_Z );
853 m_ahead,
854 a12t, cs_A,
855 u21, rs_U,
856 zeta11 );
859 m_ahead,
860 n_ahead,
861 buff_1,
862 A22, rs_A, cs_A,
863 u21, rs_U,
864 buff_0,
865 z21, rs_Z );
866
867 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, U20, u21, FLA_ZERO, t01 );
870 m_ahead,
871 n_behind,
872 buff_1,
873 U20, rs_U, cs_U,
874 u21, rs_U,
875 buff_0,
876 t01, rs_T );
877 }
878
879 /*------------------------------------------------------------*/
880
881 }
882
883 // FLA_Obj_free( &w );
884 FLA_free( buff_w );
885
886 return FLA_SUCCESS;
887}
void bl1_cdots(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy, scomplex *beta, scomplex *rho)
Definition bl1_dots.c:39
void bl1_ctrmv(uplo1_t uplo, trans1_t trans, diag1_t diag, int m, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx)
Definition bl1_trmv.c:99
void bl1_ctrmvsx(uplo1_t uplo, trans1_t trans, diag1_t diag, int m, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition bl1_trmvsx.c:129
void bl1_ctrsv(uplo1_t uplo, trans1_t trans, diag1_t diag, int m, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx)
Definition bl1_trsv.c:99
@ BLIS1_LOWER_TRIANGULAR
Definition blis_type_defs.h:62
@ BLIS1_UPPER_TRIANGULAR
Definition blis_type_defs.h:63
@ BLIS1_NONUNIT_DIAG
Definition blis_type_defs.h:74

References bl1_caxpyv(), bl1_ccopyv(), bl1_cdot(), bl1_cdots(), bl1_cgemv(), bl1_csetm(), bl1_ctrmv(), bl1_ctrmvsx(), bl1_ctrsv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_NONUNIT_DIAG, BLIS1_UPPER_TRIANGULAR, FLA_free(), FLA_Househ2_UT_l_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_opt_var5().

◆ FLA_Hess_UT_step_opd_var1()

FLA_Error FLA_Hess_UT_step_opd_var1 ( int  m_A,
int  m_T,
double buff_A,
int  rs_A,
int  cs_A,
double buff_T,
int  rs_T,
int  cs_T 
)
194{
195 double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
196 double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
197
198 double first_elem;
199 int i;
200
201 // b_alg = FLA_Obj_length( T );
202 int b_alg = m_T;
203
204 for ( i = 0; i < b_alg; ++i )
205 {
206 double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
207 double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
208
209 double* a21_t = buff_A + (i )*cs_A + (i+1)*rs_A;
210 double* a21_b = buff_A + (i )*cs_A + (i+2)*rs_A;
211
212 double* A22_t = buff_A + (i+1)*cs_A + (i+1)*rs_A;
213 double* A22_b = buff_A + (i+1)*cs_A + (i+2)*rs_A;
214
215 double* A2_l = buff_A + (i+1)*cs_A + (0 )*rs_A;
216 double* A2_r = buff_A + (i+2)*cs_A + (0 )*rs_A;
217
218 double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
219 double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
220
221 int m_ahead = m_A - i - 1;
222 int n_ahead = m_A - i - 1;
223 int n_behind = i;
224
225 /*------------------------------------------------------------*/
226
227 if ( m_ahead > 0 )
228 {
229 // FLA_Househ2_UT( FLA_LEFT,
230 // a21_t,
231 // a21_b, tau11 );
233 a21_t,
234 a21_b, rs_A,
235 tau11 );
236
237 // FLA_Copy( a21_t, first_elem );
238 // FLA_Set( FLA_ONE, a21_t );
239 first_elem = *a21_t;
240 *a21_t = *buff_1;
241
242 // FLA_Apply_H2_UT( FLA_LEFT, tau11, a21_b, A22_t,
243 // A22_b );
245 n_ahead,
246 tau11,
247 a21_b, rs_A,
248 A22_t, cs_A,
249 A22_b, rs_A, cs_A );
250
251 // FLA_Apply_H2_UT( FLA_RIGHT, tau11, a21_b, A2_l, A2_r );
253 n_ahead - 1,
254 tau11,
255 a21_b, rs_A,
256 A2_l, rs_A,
257 A2_r, rs_A, cs_A );
258
259 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
262 m_ahead,
263 n_behind,
264 buff_1,
265 A20, rs_A, cs_A,
266 a21, rs_A,
267 buff_0,
268 t01, rs_T );
269
270 // FLA_Copy( first_elem, a21_t );
271 *a21_t = first_elem;
272 }
273
274 /*------------------------------------------------------------*/
275
276 }
277
278 return FLA_SUCCESS;
279}
FLA_Error FLA_Apply_H2_UT_l_opd_var1(int m_u2_A2, int n_a1t, double *tau, double *u2, int inc_u2, double *a1t, int inc_a1t, double *A2, int rs_A2, int cs_A2)
Definition FLA_Apply_H2_UT_l_opt_var1.c:195
FLA_Error FLA_Apply_H2_UT_r_opd_var1(int n_u2h_A2, int m_a1, double *tau, double *u2h, int inc_u2h, double *a1, int inc_a1, double *A2, int rs_A2, int cs_A2)
Definition FLA_Apply_H2_UT_r_opt_var1.c:181

References bl1_dgemv(), BLIS1_CONJ_TRANSPOSE, BLIS1_NO_CONJUGATE, FLA_Apply_H2_UT_l_opd_var1(), FLA_Apply_H2_UT_r_opd_var1(), FLA_Househ2_UT_l_opd(), FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_opt_var1().

◆ FLA_Hess_UT_step_opd_var2()

FLA_Error FLA_Hess_UT_step_opd_var2 ( int  m_A,
int  m_T,
double buff_A,
int  rs_A,
int  cs_A,
double buff_T,
int  rs_T,
int  cs_T 
)
320{
321 double* buff_2 = FLA_DOUBLE_PTR( FLA_TWO );
322 double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
323 double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
325
326 double first_elem;
327 double dot_product;
328 double beta, conj_beta;
329 double inv_tau11;
330 double minus_inv_tau11;
331 int i;
332
333 // b_alg = FLA_Obj_length( T );
334 int b_alg = m_T;
335
336 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &y );
337 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
338 double* buff_y = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
339 double* buff_z = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
340 int inc_y = 1;
341 int inc_z = 1;
342
343 for ( i = 0; i < b_alg; ++i )
344 {
345 double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
346 double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
347 double* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
348 double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
349 double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
350
351 double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
352 double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
353
354 double* y0 = buff_y + (0 )*inc_y;
355 double* y2 = buff_y + (i+1)*inc_y;
356
357 double* z2 = buff_z + (i+1)*inc_z;
358
359 double* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
360 double* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
361
362 int m_ahead = m_A - i - 1;
363 int n_ahead = m_A - i - 1;
364 int m_behind = i;
365 int n_behind = i;
366
367 /*------------------------------------------------------------*/
368
369 if ( m_ahead > 0 )
370 {
371 // FLA_Househ2_UT( FLA_LEFT,
372 // a21_t,
373 // a21_b, tau11 );
375 a21_t,
376 a21_b, rs_A,
377 tau11 );
378
379 // FLA_Set( FLA_ONE, inv_tau11 );
380 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
381 // FLA_Copy( inv_tau11, minus_inv_tau11 );
382 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
385
386 // FLA_Copy( a21_t, first_elem );
387 // FLA_Set( FLA_ONE, a21_t );
388 first_elem = *a21_t;
389 *a21_t = *buff_1;
390
391 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, y2 );
394 m_ahead,
395 n_ahead,
396 buff_1,
397 A22, rs_A, cs_A,
398 a21, rs_A,
399 buff_0,
400 y2, inc_y );
401
402 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, z2 );
405 m_ahead,
406 n_ahead,
407 buff_1,
408 A22, rs_A, cs_A,
409 a21, rs_A,
410 buff_0,
411 z2, inc_z );
412
413 // FLA_Dotc( FLA_CONJUGATE, a21, z2, beta );
414 // FLA_Inv_scal( FLA_TWO, beta );
415 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
417 m_ahead,
418 a21, rs_A,
419 z2, inc_z,
420 &beta );
423
424 // FLA_Scal( minus_inv_tau11, conj_beta );
425 // FLA_Axpy( conj_beta, a21, y2 );
426 // FLA_Scal( inv_tau11, y2 );
429 m_ahead,
430 &conj_beta,
431 a21, rs_A,
432 y2, inc_y );
434 m_ahead,
435 &inv_tau11,
436 y2, inc_y );
437
438 // FLA_Scal( minus_inv_tau11, beta );
439 // FLA_Axpy( beta, a21, z2 );
440 // FLA_Scal( inv_tau11, z2 );
443 m_ahead,
444 &beta,
445 a21, rs_A,
446 z2, inc_z );
448 m_ahead,
449 &inv_tau11,
450 z2, inc_z );
451
452 // FLA_Dot( a12t, a21, dot_product );
453 // FLA_Scal( minus_inv_tau11, dot_product );
454 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
456 m_ahead,
457 a12t, cs_A,
458 a21, rs_A,
459 &dot_product );
462 m_ahead,
464 a21, rs_A,
465 a12t, cs_A );
466
467 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, y0 );
468 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, y0, a21, A02 );
471 m_behind,
472 n_ahead,
473 buff_1,
474 A02, rs_A, cs_A,
475 a21, rs_A,
476 buff_0,
477 y0, inc_y );
480 m_behind,
481 n_ahead,
483 y0, inc_y,
484 a21, rs_A,
485 A02, rs_A, cs_A );
486
487 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, a21, y2, A22 );
488 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, a21, A22 );
491 m_ahead,
492 n_ahead,
493 buff_m1,
494 a21, rs_A,
495 y2, inc_y,
496 A22, rs_A, cs_A );
499 m_ahead,
500 n_ahead,
501 buff_m1,
502 z2, inc_z,
503 a21, rs_A,
504 A22, rs_A, cs_A );
505
506 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
509 m_ahead,
510 n_behind,
511 buff_1,
512 A20, rs_A, cs_A,
513 a21, rs_A,
514 buff_0,
515 t01, rs_T );
516
517 // FLA_Copy( first_elem, a21_t );
518 *a21_t = first_elem;
519 }
520
521 /*------------------------------------------------------------*/
522
523 }
524
525 // FLA_Obj_free( &y );
526 // FLA_Obj_free( &z );
527 FLA_free( buff_y );
528 FLA_free( buff_z );
529
530 return FLA_SUCCESS;
531}

References bl1_daxpyv(), bl1_ddot(), bl1_dgemv(), bl1_dger(), bl1_dscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_opt_var2().

◆ FLA_Hess_UT_step_opd_var3()

FLA_Error FLA_Hess_UT_step_opd_var3 ( int  m_A,
int  m_T,
double buff_A,
int  rs_A,
int  cs_A,
double buff_T,
int  rs_T,
int  cs_T 
)
454{
455 double* buff_2 = FLA_DOUBLE_PTR( FLA_TWO );
456 double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
457 double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
459
460 double first_elem;
461 double dot_product;
462 double beta, conj_beta;
463 double inv_tau11;
464 double minus_inv_tau11;
467 double minus_zeta1;
468 int i;
469
470 // b_alg = FLA_Obj_length( T );
471 int b_alg = m_T;
472
473 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
474 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &y );
475 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
476 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &v );
477 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
478 double* buff_u = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
479 double* buff_y = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
480 double* buff_z = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
481 double* buff_v = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
482 double* buff_w = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
483 int inc_u = 1;
484 int inc_y = 1;
485 int inc_z = 1;
486 int inc_v = 1;
487 int inc_w = 1;
488
489 // Initialize some variables (only to prevent compiler warnings).
492
493 for ( i = 0; i < b_alg; ++i )
494 {
495 double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
496 double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
497 double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
498 double* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
499 double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
500 double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
501
502 double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
503 double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
504
505 double* upsilon1 = buff_u + (i )*inc_u;
506 double* u2 = buff_u + (i+1)*inc_u;
507
508 double* y0 = buff_y + (0 )*inc_y;
509 double* psi1 = buff_y + (i )*inc_y;
510 double* y2 = buff_y + (i+1)*inc_y;
511
512 double* zeta1 = buff_z + (i )*inc_z;
513 double* z2 = buff_z + (i+1)*inc_z;
514
515 double* v2 = buff_v + (i+1)*inc_v;
516
517 double* w2 = buff_w + (i+1)*inc_w;
518
519 double* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
520 double* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
521
522 int m_ahead = m_A - i - 1;
523 int n_ahead = m_A - i - 1;
524 int m_behind = i;
525 int n_behind = i;
526
527 /*------------------------------------------------------------*/
528
529 if ( m_behind > 0 )
530 {
531 // FLA_Copy( upsilon1, minus_upsilon1 );
532 // FLA_Scal( FLA_MINUS_ONE, minus_upsilon1 );
533 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon1, minus_conj_upsilon1 );
536
537 // FLA_Copy( psi1, minus_psi1 );
538 // FLA_Scal( FLA_MINUS_ONE, minus_psi1 );
539 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, minus_psi1, minus_conj_psi1 );
542
543 // FLA_Copy( zeta1, minus_zeta1 );
544 // FLA_Scal( FLA_MINUS_ONE, minus_zeta1 );
546
547 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon1, psi1, alpha11 );
548 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_zeta1, upsilon1, alpha11 );
550 1,
552 psi1, 1,
553 alpha11, 1 );
555 1,
557 upsilon1, 1,
558 alpha11, 1 );
559
560 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_upsilon1, y2, a12t );
561 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_zeta1, u2, a12t );
563 m_ahead,
565 y2, inc_y,
566 a12t, cs_A );
568 m_ahead,
570 u2, inc_u,
571 a12t, cs_A );
572
573 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi1, u2, a21 );
574 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_upsilon1, z2, a21 );
576 m_ahead,
578 u2, inc_u,
579 a21, rs_A );
581 m_ahead,
583 z2, inc_z,
584 a21, rs_A );
585 }
586
587 if ( m_ahead > 0 )
588 {
589 // FLA_Househ2_UT( FLA_LEFT,
590 // a21_t,
591 // a21_b, tau11 );
593 a21_t,
594 a21_b, rs_A,
595 tau11 );
596
597 // FLA_Set( FLA_ONE, inv_tau11 );
598 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
599 // FLA_Copy( inv_tau11, minus_inv_tau11 );
600 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
603
604 // FLA_Copy( a21_t, first_elem );
605 // FLA_Set( FLA_ONE, a21_t );
606 first_elem = *a21_t;
607 *a21_t = *buff_1;
608 }
609
610 if ( m_behind > 0 )
611 {
612 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u2, y2, A22 );
613 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, u2, A22 );
616 m_ahead,
617 n_ahead,
618 buff_m1,
619 u2, inc_u,
620 y2, inc_y,
621 A22, rs_A, cs_A );
624 m_ahead,
625 n_ahead,
626 buff_m1,
627 z2, inc_z,
628 u2, inc_u,
629 A22, rs_A, cs_A );
630 }
631
632 if ( m_ahead > 0 )
633 {
634 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, v2 );
637 m_ahead,
638 n_ahead,
639 buff_1,
640 A22, rs_A, cs_A,
641 a21, rs_A,
642 buff_0,
643 v2, inc_v );
644
645 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, w2 );
648 m_ahead,
649 n_ahead,
650 buff_1,
651 A22, rs_A, cs_A,
652 a21, rs_A,
653 buff_0,
654 w2, inc_w );
655
656 // FLA_Copy( a21, u2 );
657 // FLA_Copy( v2, y2 );
658 // FLA_Copy( w2, z2 );
660 m_ahead,
661 a21, rs_A,
662 u2, inc_u );
664 m_ahead,
665 v2, inc_v,
666 y2, inc_y );
668 m_ahead,
669 w2, inc_w,
670 z2, inc_z );
671
672 // FLA_Dotc( FLA_CONJUGATE, a21, z2, beta );
673 // FLA_Inv_scal( FLA_TWO, beta );
674 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
676 m_ahead,
677 a21, rs_A,
678 z2, inc_z,
679 &beta );
682
683 // FLA_Scal( minus_inv_tau11, conj_beta );
684 // FLA_Axpy( conj_beta, a21, y2 );
685 // FLA_Scal( inv_tau11, y2 );
688 m_ahead,
689 &conj_beta,
690 a21, rs_A,
691 y2, inc_y );
693 m_ahead,
694 &inv_tau11,
695 y2, inc_y );
696
697 // FLA_Scal( minus_inv_tau11, beta );
698 // FLA_Axpy( beta, a21, z2 );
699 // FLA_Scal( inv_tau11, z2 );
702 m_ahead,
703 &beta,
704 a21, rs_A,
705 z2, inc_z );
707 m_ahead,
708 &inv_tau11,
709 z2, inc_z );
710
711 // FLA_Dot( a12t, a21, dot_product );
712 // FLA_Scal( minus_inv_tau11, dot_product );
713 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
715 m_ahead,
716 a12t, cs_A,
717 a21, rs_A,
718 &dot_product );
721 m_ahead,
723 a21, rs_A,
724 a12t, cs_A );
725
726 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, y0 );
727 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, y0, a21, A02 );
730 m_behind,
731 n_ahead,
732 buff_1,
733 A02, rs_A, cs_A,
734 a21, rs_A,
735 buff_0,
736 y0, inc_y );
739 m_behind,
740 n_ahead,
742 y0, inc_y,
743 a21, rs_A,
744 A02, rs_A, cs_A );
745
746 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
749 m_ahead,
750 n_behind,
751 buff_1,
752 A20, rs_A, cs_A,
753 a21, rs_A,
754 buff_0,
755 t01, rs_T );
756
757 // FLA_Copy( first_elem, a21_t );
758 *a21_t = first_elem;
759 }
760
761 if ( m_behind + 1 == b_alg && m_ahead > 0 )
762 {
763 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u2, y2, A22 );
764 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, u2, A22 );
767 m_ahead,
768 n_ahead,
769 buff_m1,
770 u2, inc_u,
771 y2, inc_y,
772 A22, rs_A, cs_A );
775 m_ahead,
776 n_ahead,
777 buff_m1,
778 z2, inc_z,
779 u2, inc_u,
780 A22, rs_A, cs_A );
781 }
782
783 /*------------------------------------------------------------*/
784
785 }
786
787 // FLA_Obj_free( &u );
788 // FLA_Obj_free( &y );
789 // FLA_Obj_free( &z );
790 // FLA_Obj_free( &v );
791 // FLA_Obj_free( &w );
792 FLA_free( buff_u );
793 FLA_free( buff_y );
794 FLA_free( buff_z );
795 FLA_free( buff_v );
796 FLA_free( buff_w );
797
798 return FLA_SUCCESS;
799}

References bl1_daxpyv(), bl1_dcopyv(), bl1_ddot(), bl1_dgemv(), bl1_dger(), bl1_dscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, i, psi1, upsilon1, and zeta1.

Referenced by FLA_Hess_UT_step_opt_var3().

◆ FLA_Hess_UT_step_opd_var4()

FLA_Error FLA_Hess_UT_step_opd_var4 ( int  m_A,
int  m_T,
double buff_A,
int  rs_A,
int  cs_A,
double buff_Y,
int  rs_Y,
int  cs_Y,
double buff_Z,
int  rs_Z,
int  cs_Z,
double buff_T,
int  rs_T,
int  cs_T 
)
498{
499 double* buff_2 = FLA_DOUBLE_PTR( FLA_TWO );
500 double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
501 double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
503
504 double first_elem, last_elem;
505 double dot_product;
506 double beta, conj_beta;
507 double inv_tau11;
508 double minus_inv_tau11;
509 int i;
510
511 // b_alg = FLA_Obj_length( T );
512 int b_alg = m_T;
513
514 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &d );
515 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &e );
516 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
517 double* buff_d = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
518 double* buff_e = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
519 double* buff_f = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
520 int inc_d = 1;
521 int inc_e = 1;
522 int inc_f = 1;
523
524 // FLA_Set( FLA_ZERO, Y );
525 // FLA_Set( FLA_ZERO, Z );
526 bl1_dsetm( m_A,
527 b_alg,
528 buff_0,
529 buff_Y, rs_Y, cs_Y );
530 bl1_dsetm( m_A,
531 b_alg,
532 buff_0,
533 buff_Z, rs_Z, cs_Z );
534
535 for ( i = 0; i < b_alg; ++i )
536 {
537 double* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
538 double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
539 double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
540 double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
541 double* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
542 double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
543 double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
544
545 double* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
546 double* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
547 double* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
548
549 double* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
550 double* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
551 double* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
552
553 double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
554 double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
555
556 double* d0 = buff_d + (0 )*inc_d;
557
558 double* e0 = buff_e + (0 )*inc_e;
559
560 double* f0 = buff_f + (0 )*inc_f;
561
562 double* a10t_r = a10t + (i-1)*cs_A + (0 )*rs_A;
563
564 double* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
565 double* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
566
567 double* ABL = a10t;
568 double* ZBL = z10t;
569
570 double* a2 = alpha11;
571
572 int m_ahead = m_A - i - 1;
573 int n_ahead = m_A - i - 1;
574 int m_behind = i;
575 int n_behind = i;
576
577 /*------------------------------------------------------------*/
578
579 if ( m_behind > 0 )
580 {
581 // FLA_Copy( a10t_r, last_elem );
582 // FLA_Set( FLA_ONE, a10t_r );
583 last_elem = *a10t_r;
584 *a10t_r = *buff_1;
585 }
586
587 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
588 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a10t, FLA_ONE, a2 );
591 m_ahead + 1,
592 n_behind,
593 buff_m1,
594 ABL, rs_A, cs_A,
595 y10t, cs_Y,
596 buff_1,
597 a2, rs_A );
600 m_ahead + 1,
601 n_behind,
602 buff_m1,
603 ZBL, rs_Z, cs_Z,
604 a10t, cs_A,
605 buff_1,
606 a2, rs_A );
607
608 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
609 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, A20, z10t, FLA_ONE, a12t );
612 m_ahead,
613 n_behind,
614 buff_m1,
615 Y20, rs_Y, cs_Y,
616 a10t, cs_A,
617 buff_1,
618 a12t, cs_A );
621 m_ahead,
622 n_behind,
623 buff_m1,
624 A20, rs_A, cs_A,
625 z10t, cs_Z,
626 buff_1,
627 a12t, cs_A );
628
629 if ( m_behind > 0 )
630 {
631 // FLA_Copy( last_elem, a10t_r );
632 *a10t_r = last_elem;
633 }
634
635 if ( m_ahead > 0 )
636 {
637 // FLA_Househ2_UT( FLA_LEFT,
638 // a21_t,
639 // a21_b, tau11 );
641 a21_t,
642 a21_b, rs_A,
643 tau11 );
644
645 // FLA_Set( FLA_ONE, inv_tau11 );
646 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
647 // FLA_Copy( inv_tau11, minus_inv_tau11 );
648 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
651
652 // FLA_Copy( a21_t, first_elem );
653 // FLA_Set( FLA_ONE, a21_t );
654 first_elem = *a21_t;
655 *a21_t = *buff_1;
656
657 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, y21 );
660 m_ahead,
661 n_ahead,
662 buff_1,
663 A22, rs_A, cs_A,
664 a21, rs_A,
665 buff_0,
666 y21, rs_Y );
667
668 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, z21 );
671 m_ahead,
672 n_ahead,
673 buff_1,
674 A22, rs_A, cs_A,
675 a21, rs_A,
676 buff_0,
677 z21, rs_Z );
678
679 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, d0 );
680 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Y20, a21, FLA_ZERO, e0 );
681 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, a21, FLA_ZERO, f0 );
684 m_ahead,
685 n_behind,
686 buff_1,
687 A20, rs_A, cs_A,
688 a21, rs_A,
689 buff_0,
690 d0, inc_d );
693 m_ahead,
694 n_behind,
695 buff_1,
696 Y20, rs_Y, cs_Y,
697 a21, rs_A,
698 buff_0,
699 e0, inc_e );
702 m_ahead,
703 n_behind,
704 buff_1,
705 Z20, rs_Z, cs_Z,
706 a21, rs_A,
707 buff_0,
708 f0, inc_f );
709
710 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
711 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, y21 );
714 m_ahead,
715 n_behind,
716 buff_m1,
717 Y20, rs_Y, cs_Y,
718 d0, inc_d,
719 buff_1,
720 y21, rs_Y );
723 m_ahead,
724 n_behind,
725 buff_m1,
726 A20, rs_A, cs_A,
727 f0, inc_f,
728 buff_1,
729 y21, rs_Y );
730
731 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, e0, FLA_ONE, z21 );
732 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, d0, FLA_ONE, z21 );
735 m_ahead,
736 n_behind,
737 buff_m1,
738 A20, rs_A, cs_A,
739 e0, inc_e,
740 buff_1,
741 z21, rs_Z );
744 m_ahead,
745 n_behind,
746 buff_m1,
747 Z20, rs_Z, cs_Z,
748 d0, inc_d,
749 buff_1,
750 z21, rs_Z );
751
752 // FLA_Copy( d0, t01 );
754 n_behind,
755 d0, inc_d,
756 t01, rs_T );
757
758 // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
759 // FLA_Inv_scal( FLA_TWO, beta );
760 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
762 m_ahead,
763 a21, rs_A,
764 z21, rs_Z,
765 &beta );
768
769 // FLA_Scal( minus_inv_tau11, conj_beta );
770 // FLA_Axpy( conj_beta, a21, y21 );
771 // FLA_Scal( inv_tau11, y21 );
774 m_ahead,
775 &conj_beta,
776 a21, rs_A,
777 y21, rs_Y );
779 m_ahead,
780 &inv_tau11,
781 y21, rs_Y );
782
783 // FLA_Scal( minus_inv_tau11, beta );
784 // FLA_Axpy( beta, a21, z21 );
785 // FLA_Scal( inv_tau11, z21 );
788 m_ahead,
789 &beta,
790 a21, rs_A,
791 z21, rs_Z );
793 m_ahead,
794 &inv_tau11,
795 z21, rs_Z );
796
797 // FLA_Dot( a12t, a21, dot_product );
798 // FLA_Scal( minus_inv_tau11, dot_product );
799 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
801 m_ahead,
802 a12t, cs_A,
803 a21, rs_A,
804 &dot_product );
807 m_ahead,
809 a21, rs_A,
810 a12t, cs_A );
811
812 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, e0 );
813 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, e0, a21, A02 );
816 m_behind,
817 n_ahead,
818 buff_1,
819 A02, rs_A, cs_A,
820 a21, rs_A,
821 buff_0,
822 e0, inc_e );
825 m_behind,
826 n_ahead,
828 e0, inc_e,
829 a21, rs_A,
830 A02, rs_A, cs_A );
831
832 // FLA_Copy( first_elem, a21_t );
833 *a21_t = first_elem;
834 }
835
836 /*------------------------------------------------------------*/
837
838 }
839
840 // FLA_Obj_free( &d );
841 // FLA_Obj_free( &e );
842 // FLA_Obj_free( &f );
843 FLA_free( buff_d );
844 FLA_free( buff_e );
845 FLA_free( buff_f );
846
847 return FLA_SUCCESS;
848}

References bl1_daxpyv(), bl1_dcopyv(), bl1_ddot(), bl1_dgemv(), bl1_dger(), bl1_dscalv(), bl1_dsetm(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_opt_var4().

◆ FLA_Hess_UT_step_opd_var5()

FLA_Error FLA_Hess_UT_step_opd_var5 ( int  m_A,
int  m_T,
double buff_A,
int  rs_A,
int  cs_A,
double buff_U,
int  rs_U,
int  cs_U,
double buff_Z,
int  rs_Z,
int  cs_Z,
double buff_T,
int  rs_T,
int  cs_T 
)
391{
392 double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
393 double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
395 int i;
396
397 // b_alg = FLA_Obj_length( T );
398 int b_alg = m_T;
399
400 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
401 double* buff_w = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
402 int inc_w = 1;
403
404 // FLA_Set( FLA_ZERO, U );
405 // FLA_Set( FLA_ZERO, Z );
406 bl1_dsetm( m_A,
407 b_alg,
408 buff_0,
409 buff_U, rs_U, cs_U );
410 bl1_dsetm( m_A,
411 b_alg,
412 buff_0,
413 buff_Z, rs_Z, cs_Z );
414
415 for ( i = 0; i < b_alg; ++i )
416 {
417 double* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
418 double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
419 double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
420 double* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
421 double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
422 double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
423
424 double* U00 = buff_U + (0 )*cs_U + (0 )*rs_U;
425 double* u10t = buff_U + (0 )*cs_U + (i )*rs_U;
426 double* U20 = buff_U + (0 )*cs_U + (i+1)*rs_U;
427 double* u21 = buff_U + (i )*cs_U + (i+1)*rs_U;
428
429 double* Z00 = buff_Z + (0 )*cs_Z + (0 )*rs_Z;
430 double* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
431 double* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
432 double* z01 = buff_Z + (i )*cs_Z + (0 )*rs_Z;
433 double* zeta11 = buff_Z + (i )*cs_Z + (i )*rs_Z;
434 double* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
435
436 double* T00 = buff_T + (0 )*cs_T + (0 )*rs_T;
437 double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
438 double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
439
440 double* w0 = buff_w + (0 )*inc_w;
441
442 double* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
443 double* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
444
445 double* u21_t = u21 + (0 )*cs_U + (0 )*rs_U;
446
447 int m_ahead = m_A - i - 1;
448 int n_ahead = m_A - i - 1;
449 int m_behind = i;
450 int n_behind = i;
451
452 /*------------------------------------------------------------*/
453
454 if ( m_behind > 0 )
455 {
456 // FLA_Copyt( FLA_CONJ_TRANSPOSE, u10t, w0 );
457 // FLA_Trsv( FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,
458 // T00, w0 );
460 m_behind,
461 u10t, cs_U,
462 w0, inc_w );
466 m_behind,
467 T00, rs_T, cs_T,
468 w0, inc_w );
469
470 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z00, w0, FLA_ONE, a01 );
471 // FLA_Dots( FLA_MINUS_ONE, z10t, w0, FLA_ONE, alpha11 );
472 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, w0, FLA_ONE, a21 );
475 m_behind,
476 n_behind,
477 buff_m1,
478 Z00, rs_Z, cs_Z,
479 w0, inc_w,
480 buff_1,
481 a01, rs_A );
483 m_behind,
484 buff_m1,
485 z10t, cs_Z,
486 w0, inc_w,
487 buff_1,
488 alpha11 );
491 m_ahead,
492 n_behind,
493 buff_m1,
494 Z20, rs_Z, cs_Z,
495 w0, inc_w,
496 buff_1,
497 a21, rs_A );
498
499 // FLA_Trmvsx( FLA_LOWER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG,
500 // FLA_ONE, U00, a01, FLA_ZERO, w0 );
501 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, alpha11, u10t, w0 );
502 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, U20, a21, FLA_ONE, w0 );
504 m_behind,
505 a01, rs_A,
506 w0, inc_w );
510 m_behind,
511 U00, rs_U, cs_U,
512 w0, inc_w );
514 m_behind,
515 alpha11,
516 u10t, cs_U,
517 w0, inc_w );
520 m_ahead,
521 n_behind,
522 buff_1,
523 U20, rs_U, cs_U,
524 a21, rs_A,
525 buff_1,
526 w0, inc_w );
527
528 // FLA_Trsv( FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG,
529 // T00, w0 );
533 m_behind,
534 T00, rs_T, cs_T,
535 w0, inc_w );
536
537 // FLA_Trmvsx( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,
538 // FLA_MINUS_ONE, U00, w0, FLA_ONE, a01 );
539 // FLA_Dots( FLA_MINUS_ONE, u10t, w0, FLA_ONE, alpha11 );
540 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, U20, w0, FLA_ONE, a21 );
544 m_behind,
545 buff_m1,
546 U00, rs_U, cs_U,
547 w0, inc_w,
548 buff_1,
549 a01, rs_A );
551 m_behind,
552 buff_m1,
553 u10t, cs_U,
554 w0, inc_w,
555 buff_1,
556 alpha11 );
559 m_ahead,
560 n_behind,
561 buff_m1,
562 U20, rs_U, cs_U,
563 w0, inc_w,
564 buff_1,
565 a21, rs_A );
566 }
567
568 if ( m_ahead > 0 )
569 {
570 // FLA_Househ2_UT( FLA_LEFT,
571 // a21_t,
572 // a21_b, tau11 );
574 a21_t,
575 a21_b, rs_A,
576 tau11 );
577
578 // FLA_Copy( a21, u21 );
580 m_ahead,
581 a21, rs_A,
582 u21, rs_U );
583
584 // FLA_Set( FLA_ONE, u21_t );
585 *u21_t = *buff_1;
586
587 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, u21, FLA_ZERO, z01 );
588 // FLA_Dot( a12t, u21, zeta11 );
589 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, u21, FLA_ZERO, z21 );
592 m_behind,
593 n_ahead,
594 buff_1,
595 A02, rs_A, cs_A,
596 u21, rs_U,
597 buff_0,
598 z01, rs_Z );
600 m_ahead,
601 a12t, cs_A,
602 u21, rs_U,
603 zeta11 );
606 m_ahead,
607 n_ahead,
608 buff_1,
609 A22, rs_A, cs_A,
610 u21, rs_U,
611 buff_0,
612 z21, rs_Z );
613
614 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, U20, u21, FLA_ZERO, t01 );
617 m_ahead,
618 n_behind,
619 buff_1,
620 U20, rs_U, cs_U,
621 u21, rs_U,
622 buff_0,
623 t01, rs_T );
624 }
625
626 /*------------------------------------------------------------*/
627
628 }
629
630 // FLA_Obj_free( &w );
631 FLA_free( buff_w );
632
633 return FLA_SUCCESS;
634}
void bl1_ddots(conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy, double *beta, double *rho)
Definition bl1_dots.c:26
void bl1_dtrmv(uplo1_t uplo, trans1_t trans, diag1_t diag, int m, double *a, int a_rs, int a_cs, double *x, int incx)
Definition bl1_trmv.c:56
void bl1_dtrmvsx(uplo1_t uplo, trans1_t trans, diag1_t diag, int m, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
Definition bl1_trmvsx.c:71
void bl1_dtrsv(uplo1_t uplo, trans1_t trans, diag1_t diag, int m, double *a, int a_rs, int a_cs, double *x, int incx)
Definition bl1_trsv.c:56

References bl1_daxpyv(), bl1_dcopyv(), bl1_ddot(), bl1_ddots(), bl1_dgemv(), bl1_dsetm(), bl1_dtrmv(), bl1_dtrmvsx(), bl1_dtrsv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_NONUNIT_DIAG, BLIS1_UPPER_TRIANGULAR, FLA_free(), FLA_Househ2_UT_l_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_opt_var5().

◆ FLA_Hess_UT_step_ops_var1()

FLA_Error FLA_Hess_UT_step_ops_var1 ( int  m_A,
int  m_T,
float buff_A,
int  rs_A,
int  cs_A,
float buff_T,
int  rs_T,
int  cs_T 
)
101{
102 float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
103 float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
104
105 float first_elem;
106 int i;
107
108 // b_alg = FLA_Obj_length( T );
109 int b_alg = m_T;
110
111 for ( i = 0; i < b_alg; ++i )
112 {
113 float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
114 float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
115
116 float* a21_t = buff_A + (i )*cs_A + (i+1)*rs_A;
117 float* a21_b = buff_A + (i )*cs_A + (i+2)*rs_A;
118
119 float* A22_t = buff_A + (i+1)*cs_A + (i+1)*rs_A;
120 float* A22_b = buff_A + (i+1)*cs_A + (i+2)*rs_A;
121
122 float* A2_l = buff_A + (i+1)*cs_A + (0 )*rs_A;
123 float* A2_r = buff_A + (i+2)*cs_A + (0 )*rs_A;
124
125 float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
126 float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
127
128 int m_ahead = m_A - i - 1;
129 int n_ahead = m_A - i - 1;
130 int n_behind = i;
131
132 /*------------------------------------------------------------*/
133
134 if ( m_ahead > 0 )
135 {
136 // FLA_Househ2_UT( FLA_LEFT,
137 // a21_t,
138 // a21_b, tau11 );
140 a21_t,
141 a21_b, rs_A,
142 tau11 );
143
144 // FLA_Copy( a21_t, first_elem );
145 // FLA_Set( FLA_ONE, a21_t );
146 first_elem = *a21_t;
147 *a21_t = *buff_1;
148
149 // FLA_Apply_H2_UT( FLA_LEFT, tau11, a21_b, A22_t,
150 // A22_b );
152 n_ahead,
153 tau11,
154 a21_b, rs_A,
155 A22_t, cs_A,
156 A22_b, rs_A, cs_A );
157
158 // FLA_Apply_H2_UT( FLA_RIGHT, tau11, a21_b, A2_l, A2_r );
160 n_ahead - 1,
161 tau11,
162 a21_b, rs_A,
163 A2_l, rs_A,
164 A2_r, rs_A, cs_A );
165
166 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
169 m_ahead,
170 n_behind,
171 buff_1,
172 A20, rs_A, cs_A,
173 a21, rs_A,
174 buff_0,
175 t01, rs_T );
176
177 // FLA_Copy( first_elem, a21_t );
178 *a21_t = first_elem;
179 }
180
181 /*------------------------------------------------------------*/
182
183 }
184
185 return FLA_SUCCESS;
186}
FLA_Error FLA_Apply_H2_UT_l_ops_var1(int m_u2_A2, int n_a1t, float *tau, float *u2, int inc_u2, float *a1t, int inc_a1t, float *A2, int rs_A2, int cs_A2)
Definition FLA_Apply_H2_UT_l_opt_var1.c:121
FLA_Error FLA_Apply_H2_UT_r_ops_var1(int n_u2h_A2, int m_a1, float *tau, float *u2h, int inc_u2h, float *a1, int inc_a1, float *A2, int rs_A2, int cs_A2)
Definition FLA_Apply_H2_UT_r_opt_var1.c:108

References bl1_sgemv(), BLIS1_CONJ_TRANSPOSE, BLIS1_NO_CONJUGATE, FLA_Apply_H2_UT_l_ops_var1(), FLA_Apply_H2_UT_r_ops_var1(), FLA_Househ2_UT_l_ops(), FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_opt_var1().

◆ FLA_Hess_UT_step_ops_var2()

FLA_Error FLA_Hess_UT_step_ops_var2 ( int  m_A,
int  m_T,
float buff_A,
int  rs_A,
int  cs_A,
float buff_T,
int  rs_T,
int  cs_T 
)
101{
102 float* buff_2 = FLA_FLOAT_PTR( FLA_TWO );
103 float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
104 float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
106
107 float first_elem;
108 float dot_product;
109 float beta, conj_beta;
110 float inv_tau11;
111 float minus_inv_tau11;
112 int i;
113
114 // b_alg = FLA_Obj_length( T );
115 int b_alg = m_T;
116
117 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &y );
118 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
119 float* buff_y = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
120 float* buff_z = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
121 int inc_y = 1;
122 int inc_z = 1;
123
124 for ( i = 0; i < b_alg; ++i )
125 {
126 float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
127 float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
128 float* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
129 float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
130 float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
131
132 float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
133 float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
134
135 float* y0 = buff_y + (0 )*inc_y;
136 float* y2 = buff_y + (i+1)*inc_y;
137
138 float* z2 = buff_z + (i+1)*inc_z;
139
140 float* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
141 float* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
142
143 int m_ahead = m_A - i - 1;
144 int n_ahead = m_A - i - 1;
145 int m_behind = i;
146 int n_behind = i;
147
148 /*------------------------------------------------------------*/
149
150 if ( m_ahead > 0 )
151 {
152 // FLA_Househ2_UT( FLA_LEFT,
153 // a21_t,
154 // a21_b, tau11 );
156 a21_t,
157 a21_b, rs_A,
158 tau11 );
159
160 // FLA_Set( FLA_ONE, inv_tau11 );
161 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
162 // FLA_Copy( inv_tau11, minus_inv_tau11 );
163 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
166
167 // FLA_Copy( a21_t, first_elem );
168 // FLA_Set( FLA_ONE, a21_t );
169 first_elem = *a21_t;
170 *a21_t = *buff_1;
171
172 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, y2 );
175 m_ahead,
176 n_ahead,
177 buff_1,
178 A22, rs_A, cs_A,
179 a21, rs_A,
180 buff_0,
181 y2, inc_y );
182
183 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, z2 );
186 m_ahead,
187 n_ahead,
188 buff_1,
189 A22, rs_A, cs_A,
190 a21, rs_A,
191 buff_0,
192 z2, inc_z );
193
194 // FLA_Dotc( FLA_CONJUGATE, a21, z2, beta );
195 // FLA_Inv_scal( FLA_TWO, beta );
196 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
198 m_ahead,
199 a21, rs_A,
200 z2, inc_z,
201 &beta );
204
205 // FLA_Scal( minus_inv_tau11, conj_beta );
206 // FLA_Axpy( conj_beta, a21, y2 );
207 // FLA_Scal( inv_tau11, y2 );
210 m_ahead,
211 &conj_beta,
212 a21, rs_A,
213 y2, inc_y );
215 m_ahead,
216 &inv_tau11,
217 y2, inc_y );
218
219 // FLA_Scal( minus_inv_tau11, beta );
220 // FLA_Axpy( beta, a21, z2 );
221 // FLA_Scal( inv_tau11, z2 );
224 m_ahead,
225 &beta,
226 a21, rs_A,
227 z2, inc_z );
229 m_ahead,
230 &inv_tau11,
231 z2, inc_z );
232
233 // FLA_Dot( a12t, a21, dot_product );
234 // FLA_Scal( minus_inv_tau11, dot_product );
235 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
237 m_ahead,
238 a12t, cs_A,
239 a21, rs_A,
240 &dot_product );
243 m_ahead,
245 a21, rs_A,
246 a12t, cs_A );
247
248 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, y0 );
249 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, y0, a21, A02 );
252 m_behind,
253 n_ahead,
254 buff_1,
255 A02, rs_A, cs_A,
256 a21, rs_A,
257 buff_0,
258 y0, inc_y );
261 m_behind,
262 n_ahead,
264 y0, inc_y,
265 a21, rs_A,
266 A02, rs_A, cs_A );
267
268 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, a21, y2, A22 );
269 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, a21, A22 );
272 m_ahead,
273 n_ahead,
274 buff_m1,
275 a21, rs_A,
276 y2, inc_y,
277 A22, rs_A, cs_A );
280 m_ahead,
281 n_ahead,
282 buff_m1,
283 z2, inc_z,
284 a21, rs_A,
285 A22, rs_A, cs_A );
286
287 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
290 m_ahead,
291 n_behind,
292 buff_1,
293 A20, rs_A, cs_A,
294 a21, rs_A,
295 buff_0,
296 t01, rs_T );
297
298 // FLA_Copy( first_elem, a21_t );
299 *a21_t = first_elem;
300 }
301
302 /*------------------------------------------------------------*/
303
304 }
305
306 // FLA_Obj_free( &y );
307 // FLA_Obj_free( &z );
308 FLA_free( buff_y );
309 FLA_free( buff_z );
310
311 return FLA_SUCCESS;
312}

References bl1_saxpyv(), bl1_sdot(), bl1_sgemv(), bl1_sger(), bl1_sscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_opt_var2().

◆ FLA_Hess_UT_step_ops_var3()

FLA_Error FLA_Hess_UT_step_ops_var3 ( int  m_A,
int  m_T,
float buff_A,
int  rs_A,
int  cs_A,
float buff_T,
int  rs_T,
int  cs_T 
)
101{
102 float* buff_2 = FLA_FLOAT_PTR( FLA_TWO );
103 float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
104 float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
106
107 float first_elem;
108 float dot_product;
109 float beta, conj_beta;
110 float inv_tau11;
111 float minus_inv_tau11;
114 float minus_zeta1;
115 int i;
116
117 // b_alg = FLA_Obj_length( T );
118 int b_alg = m_T;
119
120 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
121 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &y );
122 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
123 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &v );
124 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
125 float* buff_u = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
126 float* buff_y = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
127 float* buff_z = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
128 float* buff_v = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
129 float* buff_w = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
130 int inc_u = 1;
131 int inc_y = 1;
132 int inc_z = 1;
133 int inc_v = 1;
134 int inc_w = 1;
135
136 // Initialize some variables (only to prevent compiler warnings).
139
140 for ( i = 0; i < b_alg; ++i )
141 {
142 float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
143 float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
144 float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
145 float* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
146 float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
147 float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
148
149 float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
150 float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
151
152 float* upsilon1 = buff_u + (i )*inc_u;
153 float* u2 = buff_u + (i+1)*inc_u;
154
155 float* y0 = buff_y + (0 )*inc_y;
156 float* psi1 = buff_y + (i )*inc_y;
157 float* y2 = buff_y + (i+1)*inc_y;
158
159 float* zeta1 = buff_z + (i )*inc_z;
160 float* z2 = buff_z + (i+1)*inc_z;
161
162 float* v2 = buff_v + (i+1)*inc_v;
163
164 float* w2 = buff_w + (i+1)*inc_w;
165
166 float* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
167 float* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
168
169 int m_ahead = m_A - i - 1;
170 int n_ahead = m_A - i - 1;
171 int m_behind = i;
172 int n_behind = i;
173
174 /*------------------------------------------------------------*/
175
176 if ( m_behind > 0 )
177 {
178 // FLA_Copy( upsilon1, minus_upsilon1 );
179 // FLA_Scal( FLA_MINUS_ONE, minus_upsilon1 );
180 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon1, minus_conj_upsilon1 );
183
184 // FLA_Copy( psi1, minus_psi1 );
185 // FLA_Scal( FLA_MINUS_ONE, minus_psi1 );
186 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, minus_psi1, minus_conj_psi1 );
189
190 // FLA_Copy( zeta1, minus_zeta1 );
191 // FLA_Scal( FLA_MINUS_ONE, minus_zeta1 );
193
194 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon1, psi1, alpha11 );
195 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_zeta1, upsilon1, alpha11 );
197 1,
199 psi1, 1,
200 alpha11, 1 );
202 1,
204 upsilon1, 1,
205 alpha11, 1 );
206
207 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_upsilon1, y2, a12t );
208 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_zeta1, u2, a12t );
210 m_ahead,
212 y2, inc_y,
213 a12t, cs_A );
215 m_ahead,
217 u2, inc_u,
218 a12t, cs_A );
219
220 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi1, u2, a21 );
221 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_upsilon1, z2, a21 );
223 m_ahead,
225 u2, inc_u,
226 a21, rs_A );
228 m_ahead,
230 z2, inc_z,
231 a21, rs_A );
232 }
233
234 if ( m_ahead > 0 )
235 {
236 // FLA_Househ2_UT( FLA_LEFT,
237 // a21_t,
238 // a21_b, tau11 );
240 a21_t,
241 a21_b, rs_A,
242 tau11 );
243
244 // FLA_Set( FLA_ONE, inv_tau11 );
245 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
246 // FLA_Copy( inv_tau11, minus_inv_tau11 );
247 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
250
251 // FLA_Copy( a21_t, first_elem );
252 // FLA_Set( FLA_ONE, a21_t );
253 first_elem = *a21_t;
254 *a21_t = *buff_1;
255 }
256
257 if ( m_behind > 0 )
258 {
259 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u2, y2, A22 );
260 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, u2, A22 );
263 m_ahead,
264 n_ahead,
265 buff_m1,
266 u2, inc_u,
267 y2, inc_y,
268 A22, rs_A, cs_A );
271 m_ahead,
272 n_ahead,
273 buff_m1,
274 z2, inc_z,
275 u2, inc_u,
276 A22, rs_A, cs_A );
277 }
278
279 if ( m_ahead > 0 )
280 {
281 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, v2 );
284 m_ahead,
285 n_ahead,
286 buff_1,
287 A22, rs_A, cs_A,
288 a21, rs_A,
289 buff_0,
290 v2, inc_v );
291
292 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, w2 );
295 m_ahead,
296 n_ahead,
297 buff_1,
298 A22, rs_A, cs_A,
299 a21, rs_A,
300 buff_0,
301 w2, inc_w );
302
303 // FLA_Copy( a21, u2 );
304 // FLA_Copy( v2, y2 );
305 // FLA_Copy( w2, z2 );
307 m_ahead,
308 a21, rs_A,
309 u2, inc_u );
311 m_ahead,
312 v2, inc_v,
313 y2, inc_y );
315 m_ahead,
316 w2, inc_w,
317 z2, inc_z );
318
319 // FLA_Dotc( FLA_CONJUGATE, a21, z2, beta );
320 // FLA_Inv_scal( FLA_TWO, beta );
321 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
323 m_ahead,
324 a21, rs_A,
325 z2, inc_z,
326 &beta );
329
330 // FLA_Scal( minus_inv_tau11, conj_beta );
331 // FLA_Axpy( conj_beta, a21, y2 );
332 // FLA_Scal( inv_tau11, y2 );
335 m_ahead,
336 &conj_beta,
337 a21, rs_A,
338 y2, inc_y );
340 m_ahead,
341 &inv_tau11,
342 y2, inc_y );
343
344 // FLA_Scal( minus_inv_tau11, beta );
345 // FLA_Axpy( beta, a21, z2 );
346 // FLA_Scal( inv_tau11, z2 );
349 m_ahead,
350 &beta,
351 a21, rs_A,
352 z2, inc_z );
354 m_ahead,
355 &inv_tau11,
356 z2, inc_z );
357
358 // FLA_Dot( a12t, a21, dot_product );
359 // FLA_Scal( minus_inv_tau11, dot_product );
360 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
362 m_ahead,
363 a12t, cs_A,
364 a21, rs_A,
365 &dot_product );
368 m_ahead,
370 a21, rs_A,
371 a12t, cs_A );
372
373 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, y0 );
374 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, y0, a21, A02 );
377 m_behind,
378 n_ahead,
379 buff_1,
380 A02, rs_A, cs_A,
381 a21, rs_A,
382 buff_0,
383 y0, inc_y );
386 m_behind,
387 n_ahead,
389 y0, inc_y,
390 a21, rs_A,
391 A02, rs_A, cs_A );
392
393 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
396 m_ahead,
397 n_behind,
398 buff_1,
399 A20, rs_A, cs_A,
400 a21, rs_A,
401 buff_0,
402 t01, rs_T );
403
404 // FLA_Copy( first_elem, a21_t );
405 *a21_t = first_elem;
406 }
407
408 if ( m_behind + 1 == b_alg && m_ahead > 0 )
409 {
410 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u2, y2, A22 );
411 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, u2, A22 );
414 m_ahead,
415 n_ahead,
416 buff_m1,
417 u2, inc_u,
418 y2, inc_y,
419 A22, rs_A, cs_A );
422 m_ahead,
423 n_ahead,
424 buff_m1,
425 z2, inc_z,
426 u2, inc_u,
427 A22, rs_A, cs_A );
428 }
429
430 /*------------------------------------------------------------*/
431
432 }
433
434 // FLA_Obj_free( &u );
435 // FLA_Obj_free( &y );
436 // FLA_Obj_free( &z );
437 // FLA_Obj_free( &v );
438 // FLA_Obj_free( &w );
439 FLA_free( buff_u );
440 FLA_free( buff_y );
441 FLA_free( buff_z );
442 FLA_free( buff_v );
443 FLA_free( buff_w );
444
445 return FLA_SUCCESS;
446}

References bl1_saxpyv(), bl1_scopyv(), bl1_sdot(), bl1_sgemv(), bl1_sger(), bl1_sscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, i, psi1, upsilon1, and zeta1.

Referenced by FLA_Hess_UT_step_opt_var3().

◆ FLA_Hess_UT_step_ops_var4()

FLA_Error FLA_Hess_UT_step_ops_var4 ( int  m_A,
int  m_T,
float buff_A,
int  rs_A,
int  cs_A,
float buff_Y,
int  rs_Y,
int  cs_Y,
float buff_Z,
int  rs_Z,
int  cs_Z,
float buff_T,
int  rs_T,
int  cs_T 
)
138{
139 float* buff_2 = FLA_FLOAT_PTR( FLA_TWO );
140 float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
141 float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
143
144 float first_elem, last_elem;
145 float dot_product;
146 float beta, conj_beta;
147 float inv_tau11;
148 float minus_inv_tau11;
149 int i;
150
151 // b_alg = FLA_Obj_length( T );
152 int b_alg = m_T;
153
154 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &d );
155 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &e );
156 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
157 float* buff_d = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
158 float* buff_e = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
159 float* buff_f = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
160 int inc_d = 1;
161 int inc_e = 1;
162 int inc_f = 1;
163
164 // FLA_Set( FLA_ZERO, Y );
165 // FLA_Set( FLA_ZERO, Z );
166 bl1_ssetm( m_A,
167 b_alg,
168 buff_0,
169 buff_Y, rs_Y, cs_Y );
170 bl1_ssetm( m_A,
171 b_alg,
172 buff_0,
173 buff_Z, rs_Z, cs_Z );
174
175 for ( i = 0; i < b_alg; ++i )
176 {
177 float* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
178 float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
179 float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
180 float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
181 float* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
182 float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
183 float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
184
185 float* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
186 float* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
187 float* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
188
189 float* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
190 float* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
191 float* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
192
193 float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
194 float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
195
196 float* d0 = buff_d + (0 )*inc_d;
197
198 float* e0 = buff_e + (0 )*inc_e;
199
200 float* f0 = buff_f + (0 )*inc_f;
201
202 float* a10t_r = a10t + (i-1)*cs_A + (0 )*rs_A;
203
204 float* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
205 float* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
206
207 float* ABL = a10t;
208 float* ZBL = z10t;
209
210 float* a2 = alpha11;
211
212 int m_ahead = m_A - i - 1;
213 int n_ahead = m_A - i - 1;
214 int m_behind = i;
215 int n_behind = i;
216
217 /*------------------------------------------------------------*/
218
219 if ( m_behind > 0 )
220 {
221 // FLA_Copy( a10t_r, last_elem );
222 // FLA_Set( FLA_ONE, a10t_r );
223 last_elem = *a10t_r;
224 *a10t_r = *buff_1;
225 }
226
227 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
228 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a10t, FLA_ONE, a2 );
231 m_ahead + 1,
232 n_behind,
233 buff_m1,
234 ABL, rs_A, cs_A,
235 y10t, cs_Y,
236 buff_1,
237 a2, rs_A );
240 m_ahead + 1,
241 n_behind,
242 buff_m1,
243 ZBL, rs_Z, cs_Z,
244 a10t, cs_A,
245 buff_1,
246 a2, rs_A );
247
248 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
249 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, A20, z10t, FLA_ONE, a12t );
252 m_ahead,
253 n_behind,
254 buff_m1,
255 Y20, rs_Y, cs_Y,
256 a10t, cs_A,
257 buff_1,
258 a12t, cs_A );
261 m_ahead,
262 n_behind,
263 buff_m1,
264 A20, rs_A, cs_A,
265 z10t, cs_Z,
266 buff_1,
267 a12t, cs_A );
268
269 if ( m_behind > 0 )
270 {
271 // FLA_Copy( last_elem, a10t_r );
272 *a10t_r = last_elem;
273 }
274
275 if ( m_ahead > 0 )
276 {
277 // FLA_Househ2_UT( FLA_LEFT,
278 // a21_t,
279 // a21_b, tau11 );
281 a21_t,
282 a21_b, rs_A,
283 tau11 );
284
285 // FLA_Set( FLA_ONE, inv_tau11 );
286 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
287 // FLA_Copy( inv_tau11, minus_inv_tau11 );
288 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
291
292 // FLA_Copy( a21_t, first_elem );
293 // FLA_Set( FLA_ONE, a21_t );
294 first_elem = *a21_t;
295 *a21_t = *buff_1;
296
297 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, y21 );
300 m_ahead,
301 n_ahead,
302 buff_1,
303 A22, rs_A, cs_A,
304 a21, rs_A,
305 buff_0,
306 y21, rs_Y );
307
308 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, z21 );
311 m_ahead,
312 n_ahead,
313 buff_1,
314 A22, rs_A, cs_A,
315 a21, rs_A,
316 buff_0,
317 z21, rs_Z );
318
319 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, d0 );
320 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Y20, a21, FLA_ZERO, e0 );
321 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, a21, FLA_ZERO, f0 );
324 m_ahead,
325 n_behind,
326 buff_1,
327 A20, rs_A, cs_A,
328 a21, rs_A,
329 buff_0,
330 d0, inc_d );
333 m_ahead,
334 n_behind,
335 buff_1,
336 Y20, rs_Y, cs_Y,
337 a21, rs_A,
338 buff_0,
339 e0, inc_e );
342 m_ahead,
343 n_behind,
344 buff_1,
345 Z20, rs_Z, cs_Z,
346 a21, rs_A,
347 buff_0,
348 f0, inc_f );
349
350 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
351 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, y21 );
354 m_ahead,
355 n_behind,
356 buff_m1,
357 Y20, rs_Y, cs_Y,
358 d0, inc_d,
359 buff_1,
360 y21, rs_Y );
363 m_ahead,
364 n_behind,
365 buff_m1,
366 A20, rs_A, cs_A,
367 f0, inc_f,
368 buff_1,
369 y21, rs_Y );
370
371 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, e0, FLA_ONE, z21 );
372 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, d0, FLA_ONE, z21 );
375 m_ahead,
376 n_behind,
377 buff_m1,
378 A20, rs_A, cs_A,
379 e0, inc_e,
380 buff_1,
381 z21, rs_Z );
384 m_ahead,
385 n_behind,
386 buff_m1,
387 Z20, rs_Z, cs_Z,
388 d0, inc_d,
389 buff_1,
390 z21, rs_Z );
391
392 // FLA_Copy( d0, t01 );
394 n_behind,
395 d0, inc_d,
396 t01, rs_T );
397
398 // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
399 // FLA_Inv_scal( FLA_TWO, beta );
400 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
402 m_ahead,
403 a21, rs_A,
404 z21, rs_Z,
405 &beta );
408
409 // FLA_Scal( minus_inv_tau11, conj_beta );
410 // FLA_Axpy( conj_beta, a21, y21 );
411 // FLA_Scal( inv_tau11, y21 );
414 m_ahead,
415 &conj_beta,
416 a21, rs_A,
417 y21, rs_Y );
419 m_ahead,
420 &inv_tau11,
421 y21, rs_Y );
422
423 // FLA_Scal( minus_inv_tau11, beta );
424 // FLA_Axpy( beta, a21, z21 );
425 // FLA_Scal( inv_tau11, z21 );
428 m_ahead,
429 &beta,
430 a21, rs_A,
431 z21, rs_Z );
433 m_ahead,
434 &inv_tau11,
435 z21, rs_Z );
436
437 // FLA_Dot( a12t, a21, dot_product );
438 // FLA_Scal( minus_inv_tau11, dot_product );
439 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
441 m_ahead,
442 a12t, cs_A,
443 a21, rs_A,
444 &dot_product );
447 m_ahead,
449 a21, rs_A,
450 a12t, cs_A );
451
452 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, e0 );
453 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, e0, a21, A02 );
456 m_behind,
457 n_ahead,
458 buff_1,
459 A02, rs_A, cs_A,
460 a21, rs_A,
461 buff_0,
462 e0, inc_e );
465 m_behind,
466 n_ahead,
468 e0, inc_e,
469 a21, rs_A,
470 A02, rs_A, cs_A );
471
472 // FLA_Copy( first_elem, a21_t );
473 *a21_t = first_elem;
474 }
475
476 /*------------------------------------------------------------*/
477
478 }
479
480 // FLA_Obj_free( &d );
481 // FLA_Obj_free( &e );
482 // FLA_Obj_free( &f );
483 FLA_free( buff_d );
484 FLA_free( buff_e );
485 FLA_free( buff_f );
486
487 return FLA_SUCCESS;
488}

References bl1_saxpyv(), bl1_scopyv(), bl1_sdot(), bl1_sgemv(), bl1_sger(), bl1_sscalv(), bl1_ssetm(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_opt_var4().

◆ FLA_Hess_UT_step_ops_var5()

FLA_Error FLA_Hess_UT_step_ops_var5 ( int  m_A,
int  m_T,
float buff_A,
int  rs_A,
int  cs_A,
float buff_U,
int  rs_U,
int  cs_U,
float buff_Z,
int  rs_Z,
int  cs_Z,
float buff_T,
int  rs_T,
int  cs_T 
)
138{
139 float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
140 float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
142 int i;
143
144 // b_alg = FLA_Obj_length( T );
145 int b_alg = m_T;
146
147 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
148 float* buff_w = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
149 int inc_w = 1;
150
151 // FLA_Set( FLA_ZERO, U );
152 // FLA_Set( FLA_ZERO, Z );
153 bl1_ssetm( m_A,
154 b_alg,
155 buff_0,
156 buff_U, rs_U, cs_U );
157 bl1_ssetm( m_A,
158 b_alg,
159 buff_0,
160 buff_Z, rs_Z, cs_Z );
161
162 for ( i = 0; i < b_alg; ++i )
163 {
164 float* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
165 float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
166 float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
167 float* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
168 float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
169 float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
170
171 float* U00 = buff_U + (0 )*cs_U + (0 )*rs_U;
172 float* u10t = buff_U + (0 )*cs_U + (i )*rs_U;
173 float* U20 = buff_U + (0 )*cs_U + (i+1)*rs_U;
174 float* u21 = buff_U + (i )*cs_U + (i+1)*rs_U;
175
176 float* Z00 = buff_Z + (0 )*cs_Z + (0 )*rs_Z;
177 float* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
178 float* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
179 float* z01 = buff_Z + (i )*cs_Z + (0 )*rs_Z;
180 float* zeta11 = buff_Z + (i )*cs_Z + (i )*rs_Z;
181 float* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
182
183 float* T00 = buff_T + (0 )*cs_T + (0 )*rs_T;
184 float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
185 float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
186
187 float* w0 = buff_w + (0 )*inc_w;
188
189 float* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
190 float* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
191
192 float* u21_t = u21 + (0 )*cs_U + (0 )*rs_U;
193
194 int m_ahead = m_A - i - 1;
195 int n_ahead = m_A - i - 1;
196 int m_behind = i;
197 int n_behind = i;
198
199 /*------------------------------------------------------------*/
200
201 if ( m_behind > 0 )
202 {
203 // FLA_Copyt( FLA_CONJ_TRANSPOSE, u10t, w0 );
204 // FLA_Trsv( FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,
205 // T00, w0 );
207 m_behind,
208 u10t, cs_U,
209 w0, inc_w );
213 m_behind,
214 T00, rs_T, cs_T,
215 w0, inc_w );
216
217 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z00, w0, FLA_ONE, a01 );
218 // FLA_Dots( FLA_MINUS_ONE, z10t, w0, FLA_ONE, alpha11 );
219 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, w0, FLA_ONE, a21 );
222 m_behind,
223 n_behind,
224 buff_m1,
225 Z00, rs_Z, cs_Z,
226 w0, inc_w,
227 buff_1,
228 a01, rs_A );
230 m_behind,
231 buff_m1,
232 z10t, cs_Z,
233 w0, inc_w,
234 buff_1,
235 alpha11 );
238 m_ahead,
239 n_behind,
240 buff_m1,
241 Z20, rs_Z, cs_Z,
242 w0, inc_w,
243 buff_1,
244 a21, rs_A );
245
246 // FLA_Trmvsx( FLA_LOWER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG,
247 // FLA_ONE, U00, a01, FLA_ZERO, w0 );
248 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, alpha11, u10t, w0 );
249 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, U20, a21, FLA_ONE, w0 );
251 m_behind,
252 a01, rs_A,
253 w0, inc_w );
257 m_behind,
258 U00, rs_U, cs_U,
259 w0, inc_w );
261 m_behind,
262 alpha11,
263 u10t, cs_U,
264 w0, inc_w );
267 m_ahead,
268 n_behind,
269 buff_1,
270 U20, rs_U, cs_U,
271 a21, rs_A,
272 buff_1,
273 w0, inc_w );
274
275 // FLA_Trsv( FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG,
276 // T00, w0 );
280 m_behind,
281 T00, rs_T, cs_T,
282 w0, inc_w );
283
284 // FLA_Trmvsx( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,
285 // FLA_MINUS_ONE, U00, w0, FLA_ONE, a01 );
286 // FLA_Dots( FLA_MINUS_ONE, u10t, w0, FLA_ONE, alpha11 );
287 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, U20, w0, FLA_ONE, a21 );
291 m_behind,
292 buff_m1,
293 U00, rs_U, cs_U,
294 w0, inc_w,
295 buff_1,
296 a01, rs_A );
298 m_behind,
299 buff_m1,
300 u10t, cs_U,
301 w0, inc_w,
302 buff_1,
303 alpha11 );
306 m_ahead,
307 n_behind,
308 buff_m1,
309 U20, rs_U, cs_U,
310 w0, inc_w,
311 buff_1,
312 a21, rs_A );
313 }
314
315 if ( m_ahead > 0 )
316 {
317 // FLA_Househ2_UT( FLA_LEFT,
318 // a21_t,
319 // a21_b, tau11 );
321 a21_t,
322 a21_b, rs_A,
323 tau11 );
324
325 // FLA_Copy( a21, u21 );
327 m_ahead,
328 a21, rs_A,
329 u21, rs_U );
330
331 // FLA_Set( FLA_ONE, u21_t );
332 *u21_t = *buff_1;
333
334 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, u21, FLA_ZERO, z01 );
335 // FLA_Dot( a12t, u21, zeta11 );
336 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, u21, FLA_ZERO, z21 );
339 m_behind,
340 n_ahead,
341 buff_1,
342 A02, rs_A, cs_A,
343 u21, rs_U,
344 buff_0,
345 z01, rs_Z );
347 m_ahead,
348 a12t, cs_A,
349 u21, rs_U,
350 zeta11 );
353 m_ahead,
354 n_ahead,
355 buff_1,
356 A22, rs_A, cs_A,
357 u21, rs_U,
358 buff_0,
359 z21, rs_Z );
360
361 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, U20, u21, FLA_ZERO, t01 );
364 m_ahead,
365 n_behind,
366 buff_1,
367 U20, rs_U, cs_U,
368 u21, rs_U,
369 buff_0,
370 t01, rs_T );
371 }
372
373 /*------------------------------------------------------------*/
374
375 }
376
377 // FLA_Obj_free( &w );
378 FLA_free( buff_w );
379
380 return FLA_SUCCESS;
381}
void bl1_sdots(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy, float *beta, float *rho)
Definition bl1_dots.c:13
void bl1_strmv(uplo1_t uplo, trans1_t trans, diag1_t diag, int m, float *a, int a_rs, int a_cs, float *x, int incx)
Definition bl1_trmv.c:13
void bl1_strmvsx(uplo1_t uplo, trans1_t trans, diag1_t diag, int m, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
Definition bl1_trmvsx.c:13
void bl1_strsv(uplo1_t uplo, trans1_t trans, diag1_t diag, int m, float *a, int a_rs, int a_cs, float *x, int incx)
Definition bl1_trsv.c:13

References bl1_saxpyv(), bl1_scopyv(), bl1_sdot(), bl1_sdots(), bl1_sgemv(), bl1_ssetm(), bl1_strmv(), bl1_strmvsx(), bl1_strsv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_NONUNIT_DIAG, BLIS1_UPPER_TRIANGULAR, FLA_free(), FLA_Househ2_UT_l_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_opt_var5().

◆ FLA_Hess_UT_step_opt_var1()

FLA_Error FLA_Hess_UT_step_opt_var1 ( FLA_Obj  A,
FLA_Obj  T 
)
19{
20 FLA_Datatype datatype;
21 int m_A, m_T;
22 int rs_A, cs_A;
23 int rs_T, cs_T;
24
25 datatype = FLA_Obj_datatype( A );
26
27 m_A = FLA_Obj_length( A );
28 m_T = FLA_Obj_length( T );
29
32
35
36
37 switch ( datatype )
38 {
39 case FLA_FLOAT:
40 {
41 float* buff_A = FLA_FLOAT_PTR( A );
42 float* buff_T = FLA_FLOAT_PTR( T );
43
45 m_T,
47 buff_T, rs_T, cs_T );
48
49 break;
50 }
51
52 case FLA_DOUBLE:
53 {
54 double* buff_A = FLA_DOUBLE_PTR( A );
55 double* buff_T = FLA_DOUBLE_PTR( T );
56
58 m_T,
60 buff_T, rs_T, cs_T );
61
62 break;
63 }
64
65 case FLA_COMPLEX:
66 {
69
71 m_T,
73 buff_T, rs_T, cs_T );
74
75 break;
76 }
77
79 {
82
84 m_T,
86 buff_T, rs_T, cs_T );
87
88 break;
89 }
90 }
91
92 return FLA_SUCCESS;
93}
FLA_Error FLA_Hess_UT_step_opd_var1(int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T)
Definition FLA_Hess_UT_opt_var1.c:190
FLA_Error FLA_Hess_UT_step_opz_var1(int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T)
Definition FLA_Hess_UT_opt_var1.c:376
FLA_Error FLA_Hess_UT_step_opc_var1(int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T)
Definition FLA_Hess_UT_opt_var1.c:283
FLA_Error FLA_Hess_UT_step_ops_var1(int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T)
Definition FLA_Hess_UT_opt_var1.c:97

References FLA_Hess_UT_step_opc_var1(), FLA_Hess_UT_step_opd_var1(), FLA_Hess_UT_step_ops_var1(), FLA_Hess_UT_step_opz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and i.

Referenced by FLA_Hess_UT_blk_var1(), and FLA_Hess_UT_opt_var1().

◆ FLA_Hess_UT_step_opt_var2()

FLA_Error FLA_Hess_UT_step_opt_var2 ( FLA_Obj  A,
FLA_Obj  T 
)
19{
20 FLA_Datatype datatype;
21 int m_A, m_T;
22 int rs_A, cs_A;
23 int rs_T, cs_T;
24
25 datatype = FLA_Obj_datatype( A );
26
27 m_A = FLA_Obj_length( A );
28 m_T = FLA_Obj_length( T );
29
32
35
36
37 switch ( datatype )
38 {
39 case FLA_FLOAT:
40 {
41 float* buff_A = FLA_FLOAT_PTR( A );
42 float* buff_T = FLA_FLOAT_PTR( T );
43
45 m_T,
47 buff_T, rs_T, cs_T );
48
49 break;
50 }
51
52 case FLA_DOUBLE:
53 {
54 double* buff_A = FLA_DOUBLE_PTR( A );
55 double* buff_T = FLA_DOUBLE_PTR( T );
56
58 m_T,
60 buff_T, rs_T, cs_T );
61
62 break;
63 }
64
65 case FLA_COMPLEX:
66 {
69
71 m_T,
73 buff_T, rs_T, cs_T );
74
75 break;
76 }
77
79 {
82
84 m_T,
86 buff_T, rs_T, cs_T );
87
88 break;
89 }
90 }
91
92 return FLA_SUCCESS;
93}
FLA_Error FLA_Hess_UT_step_opc_var2(int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T)
Definition FLA_Hess_UT_opt_var2.c:535
FLA_Error FLA_Hess_UT_step_opd_var2(int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T)
Definition FLA_Hess_UT_opt_var2.c:316
FLA_Error FLA_Hess_UT_step_opz_var2(int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T)
Definition FLA_Hess_UT_opt_var2.c:754
FLA_Error FLA_Hess_UT_step_ops_var2(int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T)
Definition FLA_Hess_UT_opt_var2.c:97

References FLA_Hess_UT_step_opc_var2(), FLA_Hess_UT_step_opd_var2(), FLA_Hess_UT_step_ops_var2(), FLA_Hess_UT_step_opz_var2(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and i.

Referenced by FLA_Hess_UT_blk_var2(), and FLA_Hess_UT_opt_var2().

◆ FLA_Hess_UT_step_opt_var3()

FLA_Error FLA_Hess_UT_step_opt_var3 ( FLA_Obj  A,
FLA_Obj  T 
)
19{
20 FLA_Datatype datatype;
21 int m_A, m_T;
22 int rs_A, cs_A;
23 int rs_T, cs_T;
24
25 datatype = FLA_Obj_datatype( A );
26
27 m_A = FLA_Obj_length( A );
28 m_T = FLA_Obj_length( T );
29
32
35
36
37 switch ( datatype )
38 {
39 case FLA_FLOAT:
40 {
41 float* buff_A = FLA_FLOAT_PTR( A );
42 float* buff_T = FLA_FLOAT_PTR( T );
43
45 m_T,
47 buff_T, rs_T, cs_T );
48
49 break;
50 }
51
52 case FLA_DOUBLE:
53 {
54 double* buff_A = FLA_DOUBLE_PTR( A );
55 double* buff_T = FLA_DOUBLE_PTR( T );
56
58 m_T,
60 buff_T, rs_T, cs_T );
61
62 break;
63 }
64
65 case FLA_COMPLEX:
66 {
69
71 m_T,
73 buff_T, rs_T, cs_T );
74
75 break;
76 }
77
79 {
82
84 m_T,
86 buff_T, rs_T, cs_T );
87
88 break;
89 }
90 }
91
92 return FLA_SUCCESS;
93}
FLA_Error FLA_Hess_UT_step_opz_var3(int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T)
Definition FLA_Hess_UT_opt_var3.c:1156
FLA_Error FLA_Hess_UT_step_opd_var3(int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T)
Definition FLA_Hess_UT_opt_var3.c:450
FLA_Error FLA_Hess_UT_step_opc_var3(int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T)
Definition FLA_Hess_UT_opt_var3.c:803
FLA_Error FLA_Hess_UT_step_ops_var3(int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T)
Definition FLA_Hess_UT_opt_var3.c:97

References FLA_Hess_UT_step_opc_var3(), FLA_Hess_UT_step_opd_var3(), FLA_Hess_UT_step_ops_var3(), FLA_Hess_UT_step_opz_var3(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and i.

Referenced by FLA_Hess_UT_blk_var3(), and FLA_Hess_UT_opt_var3().

◆ FLA_Hess_UT_step_opt_var4()

FLA_Error FLA_Hess_UT_step_opt_var4 ( FLA_Obj  A,
FLA_Obj  Y,
FLA_Obj  Z,
FLA_Obj  T 
)
30{
31 FLA_Datatype datatype;
32 int m_A, m_T;
33 int rs_A, cs_A;
34 int rs_Y, cs_Y;
35 int rs_Z, cs_Z;
36 int rs_T, cs_T;
37
38 datatype = FLA_Obj_datatype( A );
39
40 m_A = FLA_Obj_length( A );
41 m_T = FLA_Obj_length( T );
42
45
48
51
54
55
56 switch ( datatype )
57 {
58 case FLA_FLOAT:
59 {
60 float* buff_A = FLA_FLOAT_PTR( A );
61 float* buff_Y = FLA_FLOAT_PTR( Y );
62 float* buff_Z = FLA_FLOAT_PTR( Z );
63 float* buff_T = FLA_FLOAT_PTR( T );
64
66 m_T,
70 buff_T, rs_T, cs_T );
71
72 break;
73 }
74
75 case FLA_DOUBLE:
76 {
77 double* buff_A = FLA_DOUBLE_PTR( A );
78 double* buff_Y = FLA_DOUBLE_PTR( Y );
79 double* buff_Z = FLA_DOUBLE_PTR( Z );
80 double* buff_T = FLA_DOUBLE_PTR( T );
81
83 m_T,
87 buff_T, rs_T, cs_T );
88
89 break;
90 }
91
92 case FLA_COMPLEX:
93 {
98
100 m_T,
101 buff_A, rs_A, cs_A,
102 buff_Y, rs_Y, cs_Y,
103 buff_Z, rs_Z, cs_Z,
104 buff_T, rs_T, cs_T );
105
106 break;
107 }
108
110 {
115
117 m_T,
118 buff_A, rs_A, cs_A,
119 buff_Y, rs_Y, cs_Y,
120 buff_Z, rs_Z, cs_Z,
121 buff_T, rs_T, cs_T );
122
123 break;
124 }
125 }
126
127 return FLA_SUCCESS;
128}
FLA_Error FLA_Hess_UT_step_opc_var4(int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_Y, int rs_Y, int cs_Y, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_T, int rs_T, int cs_T)
Definition FLA_Hess_UT_opt_var4.c:852
FLA_Error FLA_Hess_UT_step_opz_var4(int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_Y, int rs_Y, int cs_Y, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_T, int rs_T, int cs_T)
Definition FLA_Hess_UT_opt_var4.c:1212
FLA_Error FLA_Hess_UT_step_ops_var4(int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_Y, int rs_Y, int cs_Y, float *buff_Z, int rs_Z, int cs_Z, float *buff_T, int rs_T, int cs_T)
Definition FLA_Hess_UT_opt_var4.c:132
FLA_Error FLA_Hess_UT_step_opd_var4(int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_Y, int rs_Y, int cs_Y, double *buff_Z, int rs_Z, int cs_Z, double *buff_T, int rs_T, int cs_T)
Definition FLA_Hess_UT_opt_var4.c:492

References FLA_Hess_UT_step_opc_var4(), FLA_Hess_UT_step_opd_var4(), FLA_Hess_UT_step_ops_var4(), FLA_Hess_UT_step_opz_var4(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and i.

Referenced by FLA_Hess_UT_blk_var4(), and FLA_Hess_UT_opt_var4().

◆ FLA_Hess_UT_step_opt_var5()

FLA_Error FLA_Hess_UT_step_opt_var5 ( FLA_Obj  A,
FLA_Obj  U,
FLA_Obj  Z,
FLA_Obj  T 
)
30{
31 FLA_Datatype datatype;
32 int m_A, m_T;
33 int rs_A, cs_A;
34 int rs_U, cs_U;
35 int rs_Z, cs_Z;
36 int rs_T, cs_T;
37
38 datatype = FLA_Obj_datatype( A );
39
40 m_A = FLA_Obj_length( A );
41 m_T = FLA_Obj_length( T );
42
45
48
51
54
55
56 switch ( datatype )
57 {
58 case FLA_FLOAT:
59 {
60 float* buff_A = FLA_FLOAT_PTR( A );
61 float* buff_U = FLA_FLOAT_PTR( U );
62 float* buff_Z = FLA_FLOAT_PTR( Z );
63 float* buff_T = FLA_FLOAT_PTR( T );
64
66 m_T,
70 buff_T, rs_T, cs_T );
71
72 break;
73 }
74
75 case FLA_DOUBLE:
76 {
77 double* buff_A = FLA_DOUBLE_PTR( A );
78 double* buff_U = FLA_DOUBLE_PTR( U );
79 double* buff_Z = FLA_DOUBLE_PTR( Z );
80 double* buff_T = FLA_DOUBLE_PTR( T );
81
83 m_T,
87 buff_T, rs_T, cs_T );
88
89 break;
90 }
91
92 case FLA_COMPLEX:
93 {
98
100 m_T,
101 buff_A, rs_A, cs_A,
102 buff_U, rs_U, cs_U,
103 buff_Z, rs_Z, cs_Z,
104 buff_T, rs_T, cs_T );
105
106 break;
107 }
108
110 {
115
117 m_T,
118 buff_A, rs_A, cs_A,
119 buff_U, rs_U, cs_U,
120 buff_Z, rs_Z, cs_Z,
121 buff_T, rs_T, cs_T );
122
123 break;
124 }
125 }
126
127 return FLA_SUCCESS;
128}
FLA_Error FLA_Hess_UT_step_opz_var5(int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_U, int rs_U, int cs_U, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_T, int rs_T, int cs_T)
Definition FLA_Hess_UT_opt_var5.c:891
FLA_Error FLA_Hess_UT_step_opd_var5(int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_U, int rs_U, int cs_U, double *buff_Z, int rs_Z, int cs_Z, double *buff_T, int rs_T, int cs_T)
Definition FLA_Hess_UT_opt_var5.c:385
FLA_Error FLA_Hess_UT_step_opc_var5(int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_U, int rs_U, int cs_U, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_T, int rs_T, int cs_T)
Definition FLA_Hess_UT_opt_var5.c:638
FLA_Error FLA_Hess_UT_step_ops_var5(int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_U, int rs_U, int cs_U, float *buff_Z, int rs_Z, int cs_Z, float *buff_T, int rs_T, int cs_T)
Definition FLA_Hess_UT_opt_var5.c:132

References FLA_Hess_UT_step_opc_var5(), FLA_Hess_UT_step_opd_var5(), FLA_Hess_UT_step_ops_var5(), FLA_Hess_UT_step_opz_var5(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and i.

Referenced by FLA_Hess_UT_blk_var5(), and FLA_Hess_UT_opt_var5().

◆ FLA_Hess_UT_step_opz_var1()

FLA_Error FLA_Hess_UT_step_opz_var1 ( int  m_A,
int  m_T,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_T,
int  rs_T,
int  cs_T 
)
380{
383
385 int i;
386
387 // b_alg = FLA_Obj_length( T );
388 int b_alg = m_T;
389
390 for ( i = 0; i < b_alg; ++i )
391 {
392 dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
393 dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
394
395 dcomplex* a21_t = buff_A + (i )*cs_A + (i+1)*rs_A;
396 dcomplex* a21_b = buff_A + (i )*cs_A + (i+2)*rs_A;
397
398 dcomplex* A22_t = buff_A + (i+1)*cs_A + (i+1)*rs_A;
399 dcomplex* A22_b = buff_A + (i+1)*cs_A + (i+2)*rs_A;
400
401 dcomplex* A2_l = buff_A + (i+1)*cs_A + (0 )*rs_A;
402 dcomplex* A2_r = buff_A + (i+2)*cs_A + (0 )*rs_A;
403
404 dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
405 dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
406
407 int m_ahead = m_A - i - 1;
408 int n_ahead = m_A - i - 1;
409 int n_behind = i;
410
411 /*------------------------------------------------------------*/
412
413 if ( m_ahead > 0 )
414 {
415 // FLA_Househ2_UT( FLA_LEFT,
416 // a21_t,
417 // a21_b, tau11 );
419 a21_t,
420 a21_b, rs_A,
421 tau11 );
422
423 // FLA_Copy( a21_t, first_elem );
424 // FLA_Set( FLA_ONE, a21_t );
425 first_elem = *a21_t;
426 *a21_t = *buff_1;
427
428 // FLA_Apply_H2_UT( FLA_LEFT, tau11, a21_b, A22_t,
429 // A22_b );
431 n_ahead,
432 tau11,
433 a21_b, rs_A,
434 A22_t, cs_A,
435 A22_b, rs_A, cs_A );
436
437 // FLA_Apply_H2_UT( FLA_RIGHT, tau11, a21_b, A2_l, A2_r );
439 n_ahead - 1,
440 tau11,
441 a21_b, rs_A,
442 A2_l, rs_A,
443 A2_r, rs_A, cs_A );
444
445 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
448 m_ahead,
449 n_behind,
450 buff_1,
451 A20, rs_A, cs_A,
452 a21, rs_A,
453 buff_0,
454 t01, rs_T );
455
456 // FLA_Copy( first_elem, a21_t );
457 *a21_t = first_elem;
458 }
459
460 /*------------------------------------------------------------*/
461
462 }
463
464 return FLA_SUCCESS;
465}
FLA_Error FLA_Apply_H2_UT_l_opz_var1(int m_u2_A2, int n_a1t, dcomplex *tau, dcomplex *u2, int inc_u2, dcomplex *a1t, int inc_a1t, dcomplex *A2, int rs_A2, int cs_A2)
Definition FLA_Apply_H2_UT_l_opt_var1.c:343
FLA_Error FLA_Apply_H2_UT_r_opz_var1(int n_u2h_A2, int m_a1, dcomplex *tau, dcomplex *u2h, int inc_u2h, dcomplex *a1, int inc_a1, dcomplex *A2, int rs_A2, int cs_A2)
Definition FLA_Apply_H2_UT_r_opt_var1.c:327

References bl1_zgemv(), BLIS1_CONJ_TRANSPOSE, BLIS1_NO_CONJUGATE, FLA_Apply_H2_UT_l_opz_var1(), FLA_Apply_H2_UT_r_opz_var1(), FLA_Househ2_UT_l_opz(), FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_opt_var1().

◆ FLA_Hess_UT_step_opz_var2()

FLA_Error FLA_Hess_UT_step_opz_var2 ( int  m_A,
int  m_T,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_T,
int  rs_T,
int  cs_T 
)
758{
763
769 int i;
770
771 // b_alg = FLA_Obj_length( T );
772 int b_alg = m_T;
773
774 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &y );
775 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
776 dcomplex* buff_y = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
777 dcomplex* buff_z = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
778 int inc_y = 1;
779 int inc_z = 1;
780
781 for ( i = 0; i < b_alg; ++i )
782 {
783 dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
784 dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
785 dcomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
786 dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
787 dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
788
789 dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
790 dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
791
792 dcomplex* y0 = buff_y + (0 )*inc_y;
793 dcomplex* y2 = buff_y + (i+1)*inc_y;
794
795 dcomplex* z2 = buff_z + (i+1)*inc_z;
796
797 dcomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
798 dcomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
799
800 int m_ahead = m_A - i - 1;
801 int n_ahead = m_A - i - 1;
802 int m_behind = i;
803 int n_behind = i;
804
805 /*------------------------------------------------------------*/
806
807 if ( m_ahead > 0 )
808 {
809 // FLA_Househ2_UT( FLA_LEFT,
810 // a21_t,
811 // a21_b, tau11 );
813 a21_t,
814 a21_b, rs_A,
815 tau11 );
816
817 // FLA_Set( FLA_ONE, inv_tau11 );
818 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
819 // FLA_Copy( inv_tau11, minus_inv_tau11 );
820 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
823
824 // FLA_Copy( a21_t, first_elem );
825 // FLA_Set( FLA_ONE, a21_t );
826 first_elem = *a21_t;
827 *a21_t = *buff_1;
828
829 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, y2 );
832 m_ahead,
833 n_ahead,
834 buff_1,
835 A22, rs_A, cs_A,
836 a21, rs_A,
837 buff_0,
838 y2, inc_y );
839
840 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, z2 );
843 m_ahead,
844 n_ahead,
845 buff_1,
846 A22, rs_A, cs_A,
847 a21, rs_A,
848 buff_0,
849 z2, inc_z );
850
851 // FLA_Dotc( FLA_CONJUGATE, a21, z2, beta );
852 // FLA_Inv_scal( FLA_TWO, beta );
853 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
855 m_ahead,
856 a21, rs_A,
857 z2, inc_z,
858 &beta );
861
862 // FLA_Scal( minus_inv_tau11, conj_beta );
863 // FLA_Axpy( conj_beta, a21, y2 );
864 // FLA_Scal( inv_tau11, y2 );
867 m_ahead,
868 &conj_beta,
869 a21, rs_A,
870 y2, inc_y );
872 m_ahead,
873 &inv_tau11,
874 y2, inc_y );
875
876 // FLA_Scal( minus_inv_tau11, beta );
877 // FLA_Axpy( beta, a21, z2 );
878 // FLA_Scal( inv_tau11, z2 );
881 m_ahead,
882 &beta,
883 a21, rs_A,
884 z2, inc_z );
886 m_ahead,
887 &inv_tau11,
888 z2, inc_z );
889
890 // FLA_Dot( a12t, a21, dot_product );
891 // FLA_Scal( minus_inv_tau11, dot_product );
892 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
894 m_ahead,
895 a12t, cs_A,
896 a21, rs_A,
897 &dot_product );
900 m_ahead,
902 a21, rs_A,
903 a12t, cs_A );
904
905 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, y0 );
906 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, y0, a21, A02 );
909 m_behind,
910 n_ahead,
911 buff_1,
912 A02, rs_A, cs_A,
913 a21, rs_A,
914 buff_0,
915 y0, inc_y );
918 m_behind,
919 n_ahead,
921 y0, inc_y,
922 a21, rs_A,
923 A02, rs_A, cs_A );
924
925 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, a21, y2, A22 );
926 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, a21, A22 );
929 m_ahead,
930 n_ahead,
931 buff_m1,
932 a21, rs_A,
933 y2, inc_y,
934 A22, rs_A, cs_A );
937 m_ahead,
938 n_ahead,
939 buff_m1,
940 z2, inc_z,
941 a21, rs_A,
942 A22, rs_A, cs_A );
943
944 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
947 m_ahead,
948 n_behind,
949 buff_1,
950 A20, rs_A, cs_A,
951 a21, rs_A,
952 buff_0,
953 t01, rs_T );
954
955 // FLA_Copy( first_elem, a21_t );
956 *a21_t = first_elem;
957 }
958
959 /*------------------------------------------------------------*/
960
961 }
962
963 // FLA_Obj_free( &y );
964 // FLA_Obj_free( &z );
965 FLA_free( buff_y );
966 FLA_free( buff_z );
967
968 return FLA_SUCCESS;
969}

References bl1_zaxpyv(), bl1_zdot(), bl1_zgemv(), bl1_zger(), bl1_zscals(), bl1_zscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_opt_var2().

◆ FLA_Hess_UT_step_opz_var3()

FLA_Error FLA_Hess_UT_step_opz_var3 ( int  m_A,
int  m_T,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_T,
int  rs_T,
int  cs_T 
)
1160{
1165
1174 int i;
1175
1176 // b_alg = FLA_Obj_length( T );
1177 int b_alg = m_T;
1178
1179 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
1180 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &y );
1181 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
1182 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &v );
1183 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
1184 dcomplex* buff_u = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1185 dcomplex* buff_y = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1186 dcomplex* buff_z = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1187 dcomplex* buff_v = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1188 dcomplex* buff_w = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1189 int inc_u = 1;
1190 int inc_y = 1;
1191 int inc_z = 1;
1192 int inc_v = 1;
1193 int inc_w = 1;
1194
1195 // Initialize some variables (only to prevent compiler warnings).
1196 first_elem = *buff_0;
1198
1199 for ( i = 0; i < b_alg; ++i )
1200 {
1201 dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
1202 dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
1203 dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
1204 dcomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
1205 dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
1206 dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
1207
1208 dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
1209 dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
1210
1211 dcomplex* upsilon1 = buff_u + (i )*inc_u;
1212 dcomplex* u2 = buff_u + (i+1)*inc_u;
1213
1214 dcomplex* y0 = buff_y + (0 )*inc_y;
1215 dcomplex* psi1 = buff_y + (i )*inc_y;
1216 dcomplex* y2 = buff_y + (i+1)*inc_y;
1217
1218 dcomplex* zeta1 = buff_z + (i )*inc_z;
1219 dcomplex* z2 = buff_z + (i+1)*inc_z;
1220
1221 dcomplex* v2 = buff_v + (i+1)*inc_v;
1222
1223 dcomplex* w2 = buff_w + (i+1)*inc_w;
1224
1225 dcomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
1226 dcomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
1227
1228 int m_ahead = m_A - i - 1;
1229 int n_ahead = m_A - i - 1;
1230 int m_behind = i;
1231 int n_behind = i;
1232
1233 /*------------------------------------------------------------*/
1234
1235 if ( m_behind > 0 )
1236 {
1237 // FLA_Copy( upsilon1, minus_upsilon1 );
1238 // FLA_Scal( FLA_MINUS_ONE, minus_upsilon1 );
1239 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon1, minus_conj_upsilon1 );
1242
1243 // FLA_Copy( psi1, minus_psi1 );
1244 // FLA_Scal( FLA_MINUS_ONE, minus_psi1 );
1245 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, minus_psi1, minus_conj_psi1 );
1248
1249 // FLA_Copy( zeta1, minus_zeta1 );
1250 // FLA_Scal( FLA_MINUS_ONE, minus_zeta1 );
1252
1253 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon1, psi1, alpha11 );
1254 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_zeta1, upsilon1, alpha11 );
1256 1,
1258 psi1, 1,
1259 alpha11, 1 );
1261 1,
1262 &minus_zeta1,
1263 upsilon1, 1,
1264 alpha11, 1 );
1265
1266 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_upsilon1, y2, a12t );
1267 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_zeta1, u2, a12t );
1269 m_ahead,
1271 y2, inc_y,
1272 a12t, cs_A );
1274 m_ahead,
1275 &minus_zeta1,
1276 u2, inc_u,
1277 a12t, cs_A );
1278
1279 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi1, u2, a21 );
1280 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_upsilon1, z2, a21 );
1282 m_ahead,
1284 u2, inc_u,
1285 a21, rs_A );
1287 m_ahead,
1289 z2, inc_z,
1290 a21, rs_A );
1291 }
1292
1293 if ( m_ahead > 0 )
1294 {
1295 // FLA_Househ2_UT( FLA_LEFT,
1296 // a21_t,
1297 // a21_b, tau11 );
1299 a21_t,
1300 a21_b, rs_A,
1301 tau11 );
1302
1303 // FLA_Set( FLA_ONE, inv_tau11 );
1304 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
1305 // FLA_Copy( inv_tau11, minus_inv_tau11 );
1306 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
1309
1310 // FLA_Copy( a21_t, first_elem );
1311 // FLA_Set( FLA_ONE, a21_t );
1312 first_elem = *a21_t;
1313 *a21_t = *buff_1;
1314 }
1315
1316 if ( m_behind > 0 )
1317 {
1318 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u2, y2, A22 );
1319 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, u2, A22 );
1322 m_ahead,
1323 n_ahead,
1324 buff_m1,
1325 u2, inc_u,
1326 y2, inc_y,
1327 A22, rs_A, cs_A );
1330 m_ahead,
1331 n_ahead,
1332 buff_m1,
1333 z2, inc_z,
1334 u2, inc_u,
1335 A22, rs_A, cs_A );
1336 }
1337
1338 if ( m_ahead > 0 )
1339 {
1340 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, v2 );
1343 m_ahead,
1344 n_ahead,
1345 buff_1,
1346 A22, rs_A, cs_A,
1347 a21, rs_A,
1348 buff_0,
1349 v2, inc_v );
1350
1351 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, w2 );
1354 m_ahead,
1355 n_ahead,
1356 buff_1,
1357 A22, rs_A, cs_A,
1358 a21, rs_A,
1359 buff_0,
1360 w2, inc_w );
1361
1362 // FLA_Copy( a21, u2 );
1363 // FLA_Copy( v2, y2 );
1364 // FLA_Copy( w2, z2 );
1366 m_ahead,
1367 a21, rs_A,
1368 u2, inc_u );
1370 m_ahead,
1371 v2, inc_v,
1372 y2, inc_y );
1374 m_ahead,
1375 w2, inc_w,
1376 z2, inc_z );
1377
1378 // FLA_Dotc( FLA_CONJUGATE, a21, z2, beta );
1379 // FLA_Inv_scal( FLA_TWO, beta );
1380 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
1382 m_ahead,
1383 a21, rs_A,
1384 z2, inc_z,
1385 &beta );
1388
1389 // FLA_Scal( minus_inv_tau11, conj_beta );
1390 // FLA_Axpy( conj_beta, a21, y2 );
1391 // FLA_Scal( inv_tau11, y2 );
1394 m_ahead,
1395 &conj_beta,
1396 a21, rs_A,
1397 y2, inc_y );
1399 m_ahead,
1400 &inv_tau11,
1401 y2, inc_y );
1402
1403 // FLA_Scal( minus_inv_tau11, beta );
1404 // FLA_Axpy( beta, a21, z2 );
1405 // FLA_Scal( inv_tau11, z2 );
1408 m_ahead,
1409 &beta,
1410 a21, rs_A,
1411 z2, inc_z );
1413 m_ahead,
1414 &inv_tau11,
1415 z2, inc_z );
1416
1417 // FLA_Dot( a12t, a21, dot_product );
1418 // FLA_Scal( minus_inv_tau11, dot_product );
1419 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
1421 m_ahead,
1422 a12t, cs_A,
1423 a21, rs_A,
1424 &dot_product );
1427 m_ahead,
1428 &dot_product,
1429 a21, rs_A,
1430 a12t, cs_A );
1431
1432 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, y0 );
1433 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, y0, a21, A02 );
1436 m_behind,
1437 n_ahead,
1438 buff_1,
1439 A02, rs_A, cs_A,
1440 a21, rs_A,
1441 buff_0,
1442 y0, inc_y );
1445 m_behind,
1446 n_ahead,
1448 y0, inc_y,
1449 a21, rs_A,
1450 A02, rs_A, cs_A );
1451
1452 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
1455 m_ahead,
1456 n_behind,
1457 buff_1,
1458 A20, rs_A, cs_A,
1459 a21, rs_A,
1460 buff_0,
1461 t01, rs_T );
1462
1463 // FLA_Copy( first_elem, a21_t );
1464 *a21_t = first_elem;
1465 }
1466
1467 if ( m_behind + 1 == b_alg && m_ahead > 0 )
1468 {
1469 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u2, y2, A22 );
1470 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, u2, A22 );
1473 m_ahead,
1474 n_ahead,
1475 buff_m1,
1476 u2, inc_u,
1477 y2, inc_y,
1478 A22, rs_A, cs_A );
1481 m_ahead,
1482 n_ahead,
1483 buff_m1,
1484 z2, inc_z,
1485 u2, inc_u,
1486 A22, rs_A, cs_A );
1487 }
1488
1489 /*------------------------------------------------------------*/
1490
1491 }
1492
1493 // FLA_Obj_free( &u );
1494 // FLA_Obj_free( &y );
1495 // FLA_Obj_free( &z );
1496 // FLA_Obj_free( &v );
1497 // FLA_Obj_free( &w );
1498 FLA_free( buff_u );
1499 FLA_free( buff_y );
1500 FLA_free( buff_z );
1501 FLA_free( buff_v );
1502 FLA_free( buff_w );
1503
1504 return FLA_SUCCESS;
1505}

References bl1_zaxpyv(), bl1_zcopyv(), bl1_zdot(), bl1_zgemv(), bl1_zger(), bl1_zscals(), bl1_zscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, i, psi1, upsilon1, and zeta1.

Referenced by FLA_Hess_UT_step_opt_var3().

◆ FLA_Hess_UT_step_opz_var4()

FLA_Error FLA_Hess_UT_step_opz_var4 ( int  m_A,
int  m_T,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_Y,
int  rs_Y,
int  cs_Y,
dcomplex buff_Z,
int  rs_Z,
int  cs_Z,
dcomplex buff_T,
int  rs_T,
int  cs_T 
)
1218{
1223
1229 int i;
1230
1231 // b_alg = FLA_Obj_length( T );
1232 int b_alg = m_T;
1233
1234 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &d );
1235 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &e );
1236 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
1237 dcomplex* buff_d = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1238 dcomplex* buff_e = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1239 dcomplex* buff_f = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1240 int inc_d = 1;
1241 int inc_e = 1;
1242 int inc_f = 1;
1243
1244 // FLA_Set( FLA_ZERO, Y );
1245 // FLA_Set( FLA_ZERO, Z );
1246 bl1_zsetm( m_A,
1247 b_alg,
1248 buff_0,
1249 buff_Y, rs_Y, cs_Y );
1250 bl1_zsetm( m_A,
1251 b_alg,
1252 buff_0,
1253 buff_Z, rs_Z, cs_Z );
1254
1255 for ( i = 0; i < b_alg; ++i )
1256 {
1257 dcomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
1258 dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
1259 dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
1260 dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
1261 dcomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
1262 dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
1263 dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
1264
1265 dcomplex* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
1266 dcomplex* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
1267 dcomplex* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
1268
1269 dcomplex* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
1270 dcomplex* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
1271 dcomplex* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
1272
1273 dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
1274 dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
1275
1276 dcomplex* d0 = buff_d + (0 )*inc_d;
1277
1278 dcomplex* e0 = buff_e + (0 )*inc_e;
1279
1280 dcomplex* f0 = buff_f + (0 )*inc_f;
1281
1282 dcomplex* a10t_r = a10t + (i-1)*cs_A + (0 )*rs_A;
1283
1284 dcomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
1285 dcomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
1286
1287 dcomplex* ABL = a10t;
1288 dcomplex* ZBL = z10t;
1289
1290 dcomplex* a2 = alpha11;
1291
1292 int m_ahead = m_A - i - 1;
1293 int n_ahead = m_A - i - 1;
1294 int m_behind = i;
1295 int n_behind = i;
1296
1297 /*------------------------------------------------------------*/
1298
1299 if ( m_behind > 0 )
1300 {
1301 // FLA_Copy( a10t_r, last_elem );
1302 // FLA_Set( FLA_ONE, a10t_r );
1303 last_elem = *a10t_r;
1304 *a10t_r = *buff_1;
1305 }
1306
1307 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
1308 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a10t, FLA_ONE, a2 );
1311 m_ahead + 1,
1312 n_behind,
1313 buff_m1,
1314 ABL, rs_A, cs_A,
1315 y10t, cs_Y,
1316 buff_1,
1317 a2, rs_A );
1320 m_ahead + 1,
1321 n_behind,
1322 buff_m1,
1323 ZBL, rs_Z, cs_Z,
1324 a10t, cs_A,
1325 buff_1,
1326 a2, rs_A );
1327
1328 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
1329 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, A20, z10t, FLA_ONE, a12t );
1332 m_ahead,
1333 n_behind,
1334 buff_m1,
1335 Y20, rs_Y, cs_Y,
1336 a10t, cs_A,
1337 buff_1,
1338 a12t, cs_A );
1341 m_ahead,
1342 n_behind,
1343 buff_m1,
1344 A20, rs_A, cs_A,
1345 z10t, cs_Z,
1346 buff_1,
1347 a12t, cs_A );
1348
1349 if ( m_behind > 0 )
1350 {
1351 // FLA_Copy( last_elem, a10t_r );
1352 *a10t_r = last_elem;
1353 }
1354
1355 if ( m_ahead > 0 )
1356 {
1357 // FLA_Househ2_UT( FLA_LEFT,
1358 // a21_t,
1359 // a21_b, tau11 );
1361 a21_t,
1362 a21_b, rs_A,
1363 tau11 );
1364
1365 // FLA_Set( FLA_ONE, inv_tau11 );
1366 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
1367 // FLA_Copy( inv_tau11, minus_inv_tau11 );
1368 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
1371
1372 // FLA_Copy( a21_t, first_elem );
1373 // FLA_Set( FLA_ONE, a21_t );
1374 first_elem = *a21_t;
1375 *a21_t = *buff_1;
1376
1377 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, y21 );
1380 m_ahead,
1381 n_ahead,
1382 buff_1,
1383 A22, rs_A, cs_A,
1384 a21, rs_A,
1385 buff_0,
1386 y21, rs_Y );
1387
1388 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, z21 );
1391 m_ahead,
1392 n_ahead,
1393 buff_1,
1394 A22, rs_A, cs_A,
1395 a21, rs_A,
1396 buff_0,
1397 z21, rs_Z );
1398
1399 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, d0 );
1400 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Y20, a21, FLA_ZERO, e0 );
1401 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, a21, FLA_ZERO, f0 );
1404 m_ahead,
1405 n_behind,
1406 buff_1,
1407 A20, rs_A, cs_A,
1408 a21, rs_A,
1409 buff_0,
1410 d0, inc_d );
1413 m_ahead,
1414 n_behind,
1415 buff_1,
1416 Y20, rs_Y, cs_Y,
1417 a21, rs_A,
1418 buff_0,
1419 e0, inc_e );
1422 m_ahead,
1423 n_behind,
1424 buff_1,
1425 Z20, rs_Z, cs_Z,
1426 a21, rs_A,
1427 buff_0,
1428 f0, inc_f );
1429
1430 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
1431 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, y21 );
1434 m_ahead,
1435 n_behind,
1436 buff_m1,
1437 Y20, rs_Y, cs_Y,
1438 d0, inc_d,
1439 buff_1,
1440 y21, rs_Y );
1443 m_ahead,
1444 n_behind,
1445 buff_m1,
1446 A20, rs_A, cs_A,
1447 f0, inc_f,
1448 buff_1,
1449 y21, rs_Y );
1450
1451 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, e0, FLA_ONE, z21 );
1452 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, d0, FLA_ONE, z21 );
1455 m_ahead,
1456 n_behind,
1457 buff_m1,
1458 A20, rs_A, cs_A,
1459 e0, inc_e,
1460 buff_1,
1461 z21, rs_Z );
1464 m_ahead,
1465 n_behind,
1466 buff_m1,
1467 Z20, rs_Z, cs_Z,
1468 d0, inc_d,
1469 buff_1,
1470 z21, rs_Z );
1471
1472 // FLA_Copy( d0, t01 );
1474 n_behind,
1475 d0, inc_d,
1476 t01, rs_T );
1477
1478 // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
1479 // FLA_Inv_scal( FLA_TWO, beta );
1480 // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
1482 m_ahead,
1483 a21, rs_A,
1484 z21, rs_Z,
1485 &beta );
1488
1489 // FLA_Scal( minus_inv_tau11, conj_beta );
1490 // FLA_Axpy( conj_beta, a21, y21 );
1491 // FLA_Scal( inv_tau11, y21 );
1494 m_ahead,
1495 &conj_beta,
1496 a21, rs_A,
1497 y21, rs_Y );
1499 m_ahead,
1500 &inv_tau11,
1501 y21, rs_Y );
1502
1503 // FLA_Scal( minus_inv_tau11, beta );
1504 // FLA_Axpy( beta, a21, z21 );
1505 // FLA_Scal( inv_tau11, z21 );
1508 m_ahead,
1509 &beta,
1510 a21, rs_A,
1511 z21, rs_Z );
1513 m_ahead,
1514 &inv_tau11,
1515 z21, rs_Z );
1516
1517 // FLA_Dot( a12t, a21, dot_product );
1518 // FLA_Scal( minus_inv_tau11, dot_product );
1519 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
1521 m_ahead,
1522 a12t, cs_A,
1523 a21, rs_A,
1524 &dot_product );
1527 m_ahead,
1528 &dot_product,
1529 a21, rs_A,
1530 a12t, cs_A );
1531
1532 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, e0 );
1533 // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, e0, a21, A02 );
1536 m_behind,
1537 n_ahead,
1538 buff_1,
1539 A02, rs_A, cs_A,
1540 a21, rs_A,
1541 buff_0,
1542 e0, inc_e );
1545 m_behind,
1546 n_ahead,
1548 e0, inc_e,
1549 a21, rs_A,
1550 A02, rs_A, cs_A );
1551
1552 // FLA_Copy( first_elem, a21_t );
1553 *a21_t = first_elem;
1554 }
1555
1556 /*------------------------------------------------------------*/
1557
1558 }
1559
1560 // FLA_Obj_free( &d );
1561 // FLA_Obj_free( &e );
1562 // FLA_Obj_free( &f );
1563 FLA_free( buff_d );
1564 FLA_free( buff_e );
1565 FLA_free( buff_f );
1566
1567 return FLA_SUCCESS;
1568}

References bl1_zaxpyv(), bl1_zcopyv(), bl1_zdot(), bl1_zgemv(), bl1_zger(), bl1_zscals(), bl1_zscalv(), bl1_zsetm(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_opt_var4().

◆ FLA_Hess_UT_step_opz_var5()

FLA_Error FLA_Hess_UT_step_opz_var5 ( int  m_A,
int  m_T,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_U,
int  rs_U,
int  cs_U,
dcomplex buff_Z,
int  rs_Z,
int  cs_Z,
dcomplex buff_T,
int  rs_T,
int  cs_T 
)
897{
901 int i;
902
903 // b_alg = FLA_Obj_length( T );
904 int b_alg = m_T;
905
906 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
907 dcomplex* buff_w = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
908 int inc_w = 1;
909
910 // FLA_Set( FLA_ZERO, U );
911 // FLA_Set( FLA_ZERO, Z );
912 bl1_zsetm( m_A,
913 b_alg,
914 buff_0,
915 buff_U, rs_U, cs_U );
916 bl1_zsetm( m_A,
917 b_alg,
918 buff_0,
919 buff_Z, rs_Z, cs_Z );
920
921 for ( i = 0; i < b_alg; ++i )
922 {
923 dcomplex* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
924 dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
925 dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
926 dcomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
927 dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
928 dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
929
930 dcomplex* U00 = buff_U + (0 )*cs_U + (0 )*rs_U;
931 dcomplex* u10t = buff_U + (0 )*cs_U + (i )*rs_U;
932 dcomplex* U20 = buff_U + (0 )*cs_U + (i+1)*rs_U;
933 dcomplex* u21 = buff_U + (i )*cs_U + (i+1)*rs_U;
934
935 dcomplex* Z00 = buff_Z + (0 )*cs_Z + (0 )*rs_Z;
936 dcomplex* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
937 dcomplex* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
938 dcomplex* z01 = buff_Z + (i )*cs_Z + (0 )*rs_Z;
939 dcomplex* zeta11 = buff_Z + (i )*cs_Z + (i )*rs_Z;
940 dcomplex* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
941
942 dcomplex* T00 = buff_T + (0 )*cs_T + (0 )*rs_T;
943 dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
944 dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
945
946 dcomplex* w0 = buff_w + (0 )*inc_w;
947
948 dcomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
949 dcomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
950
951 dcomplex* u21_t = u21 + (0 )*cs_U + (0 )*rs_U;
952
953 int m_ahead = m_A - i - 1;
954 int n_ahead = m_A - i - 1;
955 int m_behind = i;
956 int n_behind = i;
957
958 /*------------------------------------------------------------*/
959
960 if ( m_behind > 0 )
961 {
962 // FLA_Copyt( FLA_CONJ_TRANSPOSE, u10t, w0 );
963 // FLA_Trsv( FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,
964 // T00, w0 );
966 m_behind,
967 u10t, cs_U,
968 w0, inc_w );
972 m_behind,
973 T00, rs_T, cs_T,
974 w0, inc_w );
975
976 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z00, w0, FLA_ONE, a01 );
977 // FLA_Dots( FLA_MINUS_ONE, z10t, w0, FLA_ONE, alpha11 );
978 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, w0, FLA_ONE, a21 );
981 m_behind,
982 n_behind,
983 buff_m1,
984 Z00, rs_Z, cs_Z,
985 w0, inc_w,
986 buff_1,
987 a01, rs_A );
989 m_behind,
990 buff_m1,
991 z10t, cs_Z,
992 w0, inc_w,
993 buff_1,
994 alpha11 );
997 m_ahead,
998 n_behind,
999 buff_m1,
1000 Z20, rs_Z, cs_Z,
1001 w0, inc_w,
1002 buff_1,
1003 a21, rs_A );
1004
1005 // FLA_Trmvsx( FLA_LOWER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG,
1006 // FLA_ONE, U00, a01, FLA_ZERO, w0 );
1007 // FLA_Axpyt( FLA_CONJ_TRANSPOSE, alpha11, u10t, w0 );
1008 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, U20, a21, FLA_ONE, w0 );
1010 m_behind,
1011 a01, rs_A,
1012 w0, inc_w );
1016 m_behind,
1017 U00, rs_U, cs_U,
1018 w0, inc_w );
1020 m_behind,
1021 alpha11,
1022 u10t, cs_U,
1023 w0, inc_w );
1026 m_ahead,
1027 n_behind,
1028 buff_1,
1029 U20, rs_U, cs_U,
1030 a21, rs_A,
1031 buff_1,
1032 w0, inc_w );
1033
1034 // FLA_Trsv( FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG,
1035 // T00, w0 );
1039 m_behind,
1040 T00, rs_T, cs_T,
1041 w0, inc_w );
1042
1043 // FLA_Trmvsx( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,
1044 // FLA_MINUS_ONE, U00, w0, FLA_ONE, a01 );
1045 // FLA_Dots( FLA_MINUS_ONE, u10t, w0, FLA_ONE, alpha11 );
1046 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, U20, w0, FLA_ONE, a21 );
1050 m_behind,
1051 buff_m1,
1052 U00, rs_U, cs_U,
1053 w0, inc_w,
1054 buff_1,
1055 a01, rs_A );
1057 m_behind,
1058 buff_m1,
1059 u10t, cs_U,
1060 w0, inc_w,
1061 buff_1,
1062 alpha11 );
1065 m_ahead,
1066 n_behind,
1067 buff_m1,
1068 U20, rs_U, cs_U,
1069 w0, inc_w,
1070 buff_1,
1071 a21, rs_A );
1072 }
1073
1074 if ( m_ahead > 0 )
1075 {
1076 // FLA_Househ2_UT( FLA_LEFT,
1077 // a21_t,
1078 // a21_b, tau11 );
1080 a21_t,
1081 a21_b, rs_A,
1082 tau11 );
1083
1084 // FLA_Copy( a21, u21 );
1086 m_ahead,
1087 a21, rs_A,
1088 u21, rs_U );
1089
1090 // FLA_Set( FLA_ONE, u21_t );
1091 *u21_t = *buff_1;
1092
1093 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, u21, FLA_ZERO, z01 );
1094 // FLA_Dot( a12t, u21, zeta11 );
1095 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, u21, FLA_ZERO, z21 );
1098 m_behind,
1099 n_ahead,
1100 buff_1,
1101 A02, rs_A, cs_A,
1102 u21, rs_U,
1103 buff_0,
1104 z01, rs_Z );
1106 m_ahead,
1107 a12t, cs_A,
1108 u21, rs_U,
1109 zeta11 );
1112 m_ahead,
1113 n_ahead,
1114 buff_1,
1115 A22, rs_A, cs_A,
1116 u21, rs_U,
1117 buff_0,
1118 z21, rs_Z );
1119
1120 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, U20, u21, FLA_ZERO, t01 );
1123 m_ahead,
1124 n_behind,
1125 buff_1,
1126 U20, rs_U, cs_U,
1127 u21, rs_U,
1128 buff_0,
1129 t01, rs_T );
1130 }
1131
1132 /*------------------------------------------------------------*/
1133
1134 }
1135
1136 // FLA_Obj_free( &w );
1137 FLA_free( buff_w );
1138
1139 return FLA_SUCCESS;
1140}
void bl1_zdots(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *beta, dcomplex *rho)
Definition bl1_dots.c:56
void bl1_ztrmv(uplo1_t uplo, trans1_t trans, diag1_t diag, int m, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx)
Definition bl1_trmv.c:177
void bl1_ztrmvsx(uplo1_t uplo, trans1_t trans, diag1_t diag, int m, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition bl1_trmvsx.c:187
void bl1_ztrsv(uplo1_t uplo, trans1_t trans, diag1_t diag, int m, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx)
Definition bl1_trsv.c:177

References bl1_zaxpyv(), bl1_zcopyv(), bl1_zdot(), bl1_zdots(), bl1_zgemv(), bl1_zsetm(), bl1_ztrmv(), bl1_ztrmvsx(), bl1_ztrsv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_NONUNIT_DIAG, BLIS1_UPPER_TRIANGULAR, FLA_free(), FLA_Househ2_UT_l_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_opt_var5().

◆ FLA_Hess_UT_step_unb_var1()

FLA_Error FLA_Hess_UT_step_unb_var1 ( FLA_Obj  A,
FLA_Obj  T 
)
19{
20 FLA_Obj ATL, ATR, A00, a01, A02,
22 A20, a21, A22;
23 FLA_Obj AL, AR, A0, a1, A2;
24 FLA_Obj TTL, TTR, T00, t01, T02,
25 TBL, TBR, t10t, tau11, t12t,
26 T20, t21, T22;
27
29 a21_b;
30
32 A22_b;
33
35
37
40
41
44
45 FLA_Obj_create( datatype_A, 1, 1, 0, 0, &first_elem );
46
47 FLA_Part_2x2( A, &ATL, &ATR,
48 &ABL, &ABR, 0, 0, FLA_TL );
49 FLA_Part_1x2( A, &AL, &AR, 0, FLA_LEFT );
50 FLA_Part_2x2( T, &TTL, &TTR,
51 &TBL, &TBR, 0, 0, FLA_TL );
52
53 while ( FLA_Obj_length( ATL ) < b_alg )
54 {
55 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02,
56 /* ************* */ /* ************************** */
57 &a10t, /**/ &alpha11, &a12t,
58 ABL, /**/ ABR, &A20, /**/ &a21, &A22,
59 1, 1, FLA_BR );
60
61 FLA_Repart_1x2_to_1x3( AL, /**/ AR, &A0, /**/ &a1, &A2,
62 1, FLA_RIGHT );
63
64 FLA_Repart_2x2_to_3x3( TTL, /**/ TTR, &T00, /**/ &t01, &T02,
65 /* ************* */ /* ************************** */
66 &t10t, /**/ &tau11, &t12t,
67 TBL, /**/ TBR, &T20, /**/ &t21, &T22,
68 1, 1, FLA_BR );
69
70 /*------------------------------------------------------------*/
71
72 if ( FLA_Obj_length( A22 ) > 0 )
73 {
75 &a21_b, 1, FLA_TOP );
76
78 &A22_b, 1, FLA_TOP );
79
80 FLA_Part_1x2( A2, &A2_l, &A2_r, 1, FLA_LEFT );
81
82 // [ u21, tau11, a21 ] = House( a21 );
84 a21_t,
85 a21_b, tau11 );
86
87 // Save first element of a21_t and set it to one so we can use a21 as
88 // u21 in subsequent computations. We will restore a21_t later on.
91
92 // A22 = ( I - inv( tau ) * u21 * u21' ) * A22;
94 A22_b );
95
96 // A02 = A02 * ( I - inv( tau ) * u21 * u21' );
97 // a12t = a12t * ( I - inv( tau ) * u21 * u21' );
98 // A22 = A22 * ( I - inv( tau ) * u21 * u21' );
100
101 // t01 = U20' * u21;
103
104 // Restore first element of a21.
106 }
107
108 /*------------------------------------------------------------*/
109
110 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02,
111 a10t, alpha11, /**/ a12t,
112 /* ************** */ /* ************************ */
113 &ABL, /**/ &ABR, A20, a21, /**/ A22,
114 FLA_TL );
115
116 FLA_Cont_with_1x3_to_1x2( &AL, /**/ &AR, A0, a1, /**/ A2,
117 FLA_LEFT );
118
119 FLA_Cont_with_3x3_to_2x2( &TTL, /**/ &TTR, T00, t01, /**/ T02,
120 t10t, tau11, /**/ t12t,
121 /* ************** */ /* ************************ */
122 &TBL, /**/ &TBR, T20, t21, /**/ T22,
123 FLA_TL );
124 }
125
127
128 return FLA_SUCCESS;
129}
FLA_Error FLA_Copy(FLA_Obj A, FLA_Obj B)
Definition FLA_Copy.c:15
FLA_Error FLA_Gemv(FLA_Trans transa, FLA_Obj alpha, FLA_Obj A, FLA_Obj x, FLA_Obj beta, FLA_Obj y)
Definition FLA_Gemv.c:15
FLA_Error FLA_Apply_H2_UT(FLA_Side side, FLA_Obj tau, FLA_Obj u2, FLA_Obj a1, FLA_Obj A2)
Definition FLA_Apply_H2_UT.c:13
FLA_Error FLA_Househ2_UT(FLA_Side side, FLA_Obj chi_1, FLA_Obj x2, FLA_Obj tau)
Definition FLA_Househ2_UT.c:16

References FLA_Apply_H2_UT(), FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x3_to_2x2(), FLA_Copy(), FLA_Gemv(), FLA_Househ2_UT(), FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x2_to_3x3(), FLA_Set(), FLA_ZERO, and i.

Referenced by FLA_Hess_UT_unb_var1().

◆ FLA_Hess_UT_step_unb_var2()

FLA_Error FLA_Hess_UT_step_unb_var2 ( FLA_Obj  A,
FLA_Obj  T 
)
19{
20 FLA_Obj ATL, ATR, A00, a01, A02,
22 A20, a21, A22;
23 FLA_Obj TTL, TTR, T00, t01, T02,
24 TBL, TBR, t10t, tau11, t12t,
25 T20, t21, T22;
26 FLA_Obj yT, y0,
27 yB, psi1,
28 y2;
29 FLA_Obj zT, z0,
30 zB, zeta1,
31 z2;
32 FLA_Obj y, z;
33
40
42 a21_b;
43
45 dim_t m_A;
47
48
50
52 m_A = FLA_Obj_length( A );
53
54 FLA_Obj_create( datatype_A, 1, 1, 0, 0, &inv_tau11 );
56 FLA_Obj_create( datatype_A, 1, 1, 0, 0, &first_elem );
57 FLA_Obj_create( datatype_A, 1, 1, 0, 0, &beta );
58 FLA_Obj_create( datatype_A, 1, 1, 0, 0, &conj_beta );
59 FLA_Obj_create( datatype_A, 1, 1, 0, 0, &dot_product );
60 FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &y );
61 FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
62
63 FLA_Part_2x2( A, &ATL, &ATR,
64 &ABL, &ABR, 0, 0, FLA_TL );
65 FLA_Part_2x2( T, &TTL, &TTR,
66 &TBL, &TBR, 0, 0, FLA_TL );
67 FLA_Part_2x1( y, &yT,
68 &yB, 0, FLA_TOP );
69 FLA_Part_2x1( z, &zT,
70 &zB, 0, FLA_TOP );
71
72 while ( FLA_Obj_length( ATL ) < b_alg )
73 {
74 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02,
75 /* ************* */ /* ************************** */
76 &a10t, /**/ &alpha11, &a12t,
77 ABL, /**/ ABR, &A20, /**/ &a21, &A22,
78 1, 1, FLA_BR );
79 FLA_Repart_2x2_to_3x3( TTL, /**/ TTR, &T00, /**/ &t01, &T02,
80 /* ************* */ /* ************************** */
81 &t10t, /**/ &tau11, &t12t,
82 TBL, /**/ TBR, &T20, /**/ &t21, &T22,
83 1, 1, FLA_BR );
85 /* ** */ /* **** */
86 &psi1,
87 yB, &y2, 1, FLA_BOTTOM );
89 /* ** */ /* ***** */
90 &zeta1,
91 zB, &z2, 1, FLA_BOTTOM );
92
93 /*------------------------------------------------------------*/
94
95 if ( FLA_Obj_length( A22 ) > 0 )
96 {
98 &a21_b, 1, FLA_TOP );
99
100 // [ u21, tau11, a21 ] = House( a21 );
102 a21_t,
103 a21_b, tau11 );
104
105 // inv_tau11 = 1 / tau11;
106 // minus_inv_tau11 = -1 / tau11;
111
112 // Save first element of a21_t and set it to one so we can use a21 as
113 // u21 in subsequent computations. We will restore a21_t later on.
116
117 // y21 = A22' * u21;
119
120 // z21 = A22 * u21;
122
123 // beta = u21' * z21 / 2;
124 // conj_beta = conj(beta);
128
129 // y21' = ( y21' - beta / tau * u21' ) / tau;
130 // y21 = ( y21 - conj(beta) / tau * u21 ) / tau;
134
135 // z21 = ( z21 - beta / tau * u21 ) / tau;
137 FLA_Axpy( beta, a21, z2 );
139
140 // a12t = a12t * ( I - u21 * u21' / tau );
141 // = a12t - ( a12t * u21 ) * u21' / tau;
145
146 // A02 = A02 * ( I - u21 * u21' / tau );
147 // = A02 - ( A02 * u21 ) * u21' / tau;
150
151 // A22 = A22 - u21 * y21' - z21 * u21';
154
155 // t01 = U20' * u21;
157
158 // Restore first element of a21.
160 }
161
162 /*------------------------------------------------------------*/
163
164 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02,
165 a10t, alpha11, /**/ a12t,
166 /* ************** */ /* ************************ */
167 &ABL, /**/ &ABR, A20, a21, /**/ A22,
168 FLA_TL );
169 FLA_Cont_with_3x3_to_2x2( &TTL, /**/ &TTR, T00, t01, /**/ T02,
170 t10t, tau11, /**/ t12t,
171 /* ************** */ /* ************************ */
172 &TBL, /**/ &TBR, T20, t21, /**/ T22,
173 FLA_TL );
175 psi1,
176 /* ** */ /* **** */
177 &yB, y2, FLA_TOP );
179 zeta1,
180 /* ** */ /* ***** */
181 &zB, z2, FLA_TOP );
182 }
183
187 FLA_Obj_free( &beta );
190 FLA_Obj_free( &y );
191 FLA_Obj_free( &z );
192
193 return FLA_SUCCESS;
194}
FLA_Error FLA_Scal(FLA_Obj alpha, FLA_Obj A)
Definition FLA_Scal.c:15
FLA_Error FLA_Dot(FLA_Obj x, FLA_Obj y, FLA_Obj rho)
Definition FLA_Dot.c:13
FLA_Error FLA_Dotc(FLA_Conj conj, FLA_Obj x, FLA_Obj y, FLA_Obj rho)
Definition FLA_Dotc.c:13
FLA_Error FLA_Axpy(FLA_Obj alpha, FLA_Obj A, FLA_Obj B)
Definition FLA_Axpy.c:15
FLA_Error FLA_Axpyt(FLA_Trans trans, FLA_Obj alpha, FLA_Obj A, FLA_Obj B)
Definition FLA_Axpyt.c:15
FLA_Error FLA_Inv_scal(FLA_Obj alpha, FLA_Obj A)
Definition FLA_Inv_scal.c:13
FLA_Error FLA_Copyt(FLA_Trans trans, FLA_Obj A, FLA_Obj B)
Definition FLA_Copyt.c:15
FLA_Error FLA_Inv_scalc(FLA_Conj conjalpha, FLA_Obj alpha, FLA_Obj A)
Definition FLA_Inv_scalc.c:13
FLA_Error FLA_Gerc(FLA_Conj conjx, FLA_Conj conjy, FLA_Obj alpha, FLA_Obj x, FLA_Obj y, FLA_Obj A)
Definition FLA_Gerc.c:13

References FLA_Axpy(), FLA_Axpyt(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copy(), FLA_Copyt(), FLA_Dot(), FLA_Dotc(), FLA_Gemv(), FLA_Gerc(), FLA_Househ2_UT(), FLA_Inv_scal(), FLA_Inv_scalc(), FLA_MINUS_ONE, FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_ONE, FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Scal(), FLA_Set(), FLA_TWO, FLA_ZERO, i, psi1, and zeta1.

Referenced by FLA_Hess_UT_unb_var2().

◆ FLA_Hess_UT_step_unb_var3()

FLA_Error FLA_Hess_UT_step_unb_var3 ( FLA_Obj  A,
FLA_Obj  T 
)
19{
20 FLA_Obj ATL, ATR, A00, a01, A02,
22 A20, a21, A22;
23 FLA_Obj TTL, TTR, T00, t01, T02,
24 TBL, TBR, t10t, tau11, t12t,
25 T20, t21, T22;
26 FLA_Obj uT, u0,
27 uB, upsilon1,
28 u2;
29 FLA_Obj yT, y0,
30 yB, psi1,
31 y2;
32 FLA_Obj zT, z0,
33 zB, zeta1,
34 z2;
35 FLA_Obj vT, v0,
36 vB, nu1,
37 v2;
38 FLA_Obj wT, w0,
39 wB, omega1,
40 w2;
41 FLA_Obj u, y, z, v, w;
42
54
56 a21_b;
57
59 dim_t m_A;
61
62
64
66 m_A = FLA_Obj_length( A );
67
68 FLA_Obj_create( datatype_A, 1, 1, 0, 0, &inv_tau11 );
70 FLA_Obj_create( datatype_A, 1, 1, 0, 0, &first_elem );
71 FLA_Obj_create( datatype_A, 1, 1, 0, 0, &beta );
72 FLA_Obj_create( datatype_A, 1, 1, 0, 0, &conj_beta );
73 FLA_Obj_create( datatype_A, 1, 1, 0, 0, &dot_product );
76 FLA_Obj_create( datatype_A, 1, 1, 0, 0, &minus_psi1 );
78 FLA_Obj_create( datatype_A, 1, 1, 0, 0, &minus_zeta1 );
79 FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
80 FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &y );
81 FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
82 FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &v );
83 FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
84
85 FLA_Part_2x2( A, &ATL, &ATR,
86 &ABL, &ABR, 0, 0, FLA_TL );
87 FLA_Part_2x2( T, &TTL, &TTR,
88 &TBL, &TBR, 0, 0, FLA_TL );
89 FLA_Part_2x1( u, &uT,
90 &uB, 0, FLA_TOP );
91 FLA_Part_2x1( y, &yT,
92 &yB, 0, FLA_TOP );
93 FLA_Part_2x1( z, &zT,
94 &zB, 0, FLA_TOP );
95 FLA_Part_2x1( v, &vT,
96 &vB, 0, FLA_TOP );
97 FLA_Part_2x1( w, &wT,
98 &wB, 0, FLA_TOP );
99
100 while ( FLA_Obj_length( ATL ) < b_alg )
101 {
102 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02,
103 /* ************* */ /* ************************** */
104 &a10t, /**/ &alpha11, &a12t,
105 ABL, /**/ ABR, &A20, /**/ &a21, &A22,
106 1, 1, FLA_BR );
107 FLA_Repart_2x2_to_3x3( TTL, /**/ TTR, &T00, /**/ &t01, &T02,
108 /* ************* */ /* ************************** */
109 &t10t, /**/ &tau11, &t12t,
110 TBL, /**/ TBR, &T20, /**/ &t21, &T22,
111 1, 1, FLA_BR );
113 /* ** */ /* ******** */
114 &upsilon1,
115 uB, &u2, 1, FLA_BOTTOM );
117 /* ** */ /* **** */
118 &psi1,
119 yB, &y2, 1, FLA_BOTTOM );
121 /* ** */ /* ***** */
122 &zeta1,
123 zB, &z2, 1, FLA_BOTTOM );
125 /* ** */ /* *** */
126 &nu1,
127 vB, &v2, 1, FLA_BOTTOM );
129 /* ** */ /* ****** */
130 &omega1,
131 wB, &w2, 1, FLA_BOTTOM );
132
133 /*------------------------------------------------------------*/
134
135 if ( FLA_Obj_length( ATL ) > 0 )
136 {
140
144
147
148 // alpha11 = alpha11 - upsilon11 * conj(psi11) - zeta11 * conj(upsilon11);
151
152 // a12t = a12t - upsilon11 * y21' - zeta11 * u21';
155
156 // a21 = a21 - conj(psi11) * u21 - conj(upsilon11) * z21;
159 }
160
161 if ( FLA_Obj_length( A22 ) > 0 )
162 {
164 &a21_b, 1, FLA_TOP );
165
166 // [ x21, tau11, a21 ] = House( a21 );
168 a21_t,
169 a21_b, tau11 );
170
171 // inv_tau11 = 1 / tau11;
172 // minus_inv_tau11 = -1 / tau11;
177
178 // Save first element of a21_t and set it to one so we can use a21 as
179 // u21 in subsequent computations. We will restore a21_t later on.
182 }
183
184 if ( FLA_Obj_length( ATL ) > 0 )
185 {
186 // A22 = A22 - u21 * y21' - z21 * u21';
189 }
190
191 if ( FLA_Obj_length( A22 ) > 0 )
192 {
193 // v2 = A22' * x21;
195
196 // w2 = A22 * x21;
198
199 // u21 = x21;
200 // y21 = v2;
201 // z21 = w2;
202 FLA_Copy( a21, u2 );
203 FLA_Copy( v2, y2 );
204 FLA_Copy( w2, z2 );
205
206 // beta = u21' * z21 / 2;
207 // conj_beta = conj(beta);
211
212 // y21' = ( y21' - beta / tau * u21' ) / tau;
213 // y21 = ( y21 - conj(beta) / tau * u21 ) / tau;
217
218 // z21 = ( z21 - beta / tau * u21 ) / tau;
220 FLA_Axpy( beta, a21, z2 );
222
223 // a12t = a12t * ( I - u21 * u21' / tau );
224 // = a12t - ( a12t * u21 ) * u21' / tau;
228
229 // A02 = A02 * ( I - u21 * u21' / tau );
230 // = A02 - ( A02 * u21 ) * u21' / tau;
233
234 // t01 = U20' * u21;
236
237 // Restore first element of a21.
239 }
240
241 // Update A22 if this is the last iteration; this is needed when we're
242 // being called from the blocked routine so A22 is left in a valid state.
243 if ( FLA_Obj_length( ATL ) + 1 == b_alg &&
244 FLA_Obj_length( A22 ) > 0 )
245 {
246 // A22 = A22 - u21 * y21' - z21 * u21';
249 }
250
251 /*------------------------------------------------------------*/
252
253 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02,
254 a10t, alpha11, /**/ a12t,
255 /* ************** */ /* ************************ */
256 &ABL, /**/ &ABR, A20, a21, /**/ A22,
257 FLA_TL );
258 FLA_Cont_with_3x3_to_2x2( &TTL, /**/ &TTR, T00, t01, /**/ T02,
259 t10t, tau11, /**/ t12t,
260 /* ************** */ /* ************************ */
261 &TBL, /**/ &TBR, T20, t21, /**/ T22,
262 FLA_TL );
264 upsilon1,
265 /* ** */ /* ******** */
266 &uB, u2, FLA_TOP );
268 psi1,
269 /* ** */ /* **** */
270 &yB, y2, FLA_TOP );
272 zeta1,
273 /* ** */ /* ***** */
274 &zB, z2, FLA_TOP );
276 nu1,
277 /* ** */ /* *** */
278 &vB, v2, FLA_TOP );
280 omega1,
281 /* ** */ /* ****** */
282 &wB, w2, FLA_TOP );
283 }
284
288 FLA_Obj_free( &beta );
296 FLA_Obj_free( &u );
297 FLA_Obj_free( &y );
298 FLA_Obj_free( &z );
299 FLA_Obj_free( &v );
300 FLA_Obj_free( &w );
301
302 return FLA_SUCCESS;
303}
double *restrict omega1
Definition bl1_axpyv2bdotaxpy.c:200

References FLA_Axpy(), FLA_Axpyt(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copy(), FLA_Copyt(), FLA_Dot(), FLA_Dotc(), FLA_Gemv(), FLA_Gerc(), FLA_Househ2_UT(), FLA_Inv_scal(), FLA_Inv_scalc(), FLA_MINUS_ONE, FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_ONE, FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Scal(), FLA_Set(), FLA_TWO, FLA_ZERO, i, omega1, psi1, upsilon1, and zeta1.

Referenced by FLA_Hess_UT_unb_var3().

◆ FLA_Hess_UT_step_unb_var4()

FLA_Error FLA_Hess_UT_step_unb_var4 ( FLA_Obj  A,
FLA_Obj  Y,
FLA_Obj  Z,
FLA_Obj  T 
)
30{
31 FLA_Obj ATL, ATR, A00, a01, A02,
33 A20, a21, A22;
34 FLA_Obj YTL, YTR, Y00, y01, Y02,
35 YBL, YBR, y10t, psi11, y12t,
36 Y20, y21, Y22;
37 FLA_Obj ZTL, ZTR, Z00, z01, Z02,
39 Z20, z21, Z22;
40 FLA_Obj TTL, TTR, T00, t01, T02,
41 TBL, TBR, t10t, tau11, t12t,
42 T20, t21, T22;
43 FLA_Obj dT, d0,
44 dB, delta1,
45 d2;
46 FLA_Obj eT, e0,
47 eB, epsilon1,
48 e2;
49 FLA_Obj fT, f0,
50 fB, phi1,
51 f2;
52 FLA_Obj d, e, f;
53
61
64 a21_b;
65 FLA_Obj a2;
66
68 dim_t m_A;
70
71
73
75 m_A = FLA_Obj_length( A );
76
77 FLA_Obj_create( datatype_A, 1, 1, 0, 0, &inv_tau11 );
79 FLA_Obj_create( datatype_A, 1, 1, 0, 0, &first_elem );
80 FLA_Obj_create( datatype_A, 1, 1, 0, 0, &last_elem );
81 FLA_Obj_create( datatype_A, 1, 1, 0, 0, &beta );
82 FLA_Obj_create( datatype_A, 1, 1, 0, 0, &conj_beta );
83 FLA_Obj_create( datatype_A, 1, 1, 0, 0, &dot_product );
84 FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &d );
85 FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &e );
86 FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
87
88 FLA_Set( FLA_ZERO, Y );
89 FLA_Set( FLA_ZERO, Z );
90
91 FLA_Part_2x2( A, &ATL, &ATR,
92 &ABL, &ABR, 0, 0, FLA_TL );
93 FLA_Part_2x2( Y, &YTL, &YTR,
94 &YBL, &YBR, 0, 0, FLA_TL );
95 FLA_Part_2x2( Z, &ZTL, &ZTR,
96 &ZBL, &ZBR, 0, 0, FLA_TL );
97 FLA_Part_2x2( T, &TTL, &TTR,
98 &TBL, &TBR, 0, 0, FLA_TL );
99 FLA_Part_2x1( d, &dT,
100 &dB, 0, FLA_TOP );
101 FLA_Part_2x1( e, &eT,
102 &eB, 0, FLA_TOP );
103 FLA_Part_2x1( f, &fT,
104 &fB, 0, FLA_TOP );
105
106 while ( FLA_Obj_length( ATL ) < b_alg )
107 {
108 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02,
109 /* ************* */ /* ************************** */
110 &a10t, /**/ &alpha11, &a12t,
111 ABL, /**/ ABR, &A20, /**/ &a21, &A22,
112 1, 1, FLA_BR );
113 FLA_Repart_2x2_to_3x3( YTL, /**/ YTR, &Y00, /**/ &y01, &Y02,
114 /* ************* */ /* ************************ */
115 &y10t, /**/ &psi11, &y12t,
116 YBL, /**/ YBR, &Y20, /**/ &y21, &Y22,
117 1, 1, FLA_BR );
118 FLA_Repart_2x2_to_3x3( ZTL, /**/ ZTR, &Z00, /**/ &z01, &Z02,
119 /* ************* */ /* ************************* */
120 &z10t, /**/ &zeta11, &z12t,
121 ZBL, /**/ ZBR, &Z20, /**/ &z21, &Z22,
122 1, 1, FLA_BR );
123 FLA_Repart_2x2_to_3x3( TTL, /**/ TTR, &T00, /**/ &t01, &T02,
124 /* ************* */ /* ************************** */
125 &t10t, /**/ &tau11, &t12t,
126 TBL, /**/ TBR, &T20, /**/ &t21, &T22,
127 1, 1, FLA_BR );
129 /* ** */ /* ****** */
130 &delta1,
131 dB, &d2, 1, FLA_BOTTOM );
133 /* ** */ /* ******** */
134 &epsilon1,
135 eB, &e2, 1, FLA_BOTTOM );
137 /* ** */ /* **** */
138 &phi1,
139 fB, &f2, 1, FLA_BOTTOM );
140
141 /*------------------------------------------------------------*/
142
143 // Save first element of a10_r and set it to one so we can use a10t as
144 // u10t in subsequent computations. We will restore a10_r later on.
145 if ( FLA_Obj_length( ATL ) > 0 )
146 {
150 }
151
153 a21, &a2 );
154
155 // alpha11 = alpha11 - u10t * y10t' - z10t * u10t';
156 // a21 = a21 - U20 * y10t' - Z20 * u10t';
159
160 // a12t = a12t - u10t * Y20' - z10t * U20';
163
164 // Restore last element of a10t.
165 if ( FLA_Obj_length( ATL ) > 0 )
166 {
168 }
169
170 if ( FLA_Obj_length( A22 ) > 0 )
171 {
173 &a21_b, 1, FLA_TOP );
174
175 // [ u21, tau11, a21 ] = House( a21 );
177 a21_t,
178 a21_b, tau11 );
179
180 // inv_tau11 = 1 / tau11;
181 // minus_inv_tau11 = -1 / tau11;
186
187 // Save first element of a21_t and set it to one.
190
191 // y21 = A22' * u21;
193
194 // z21 = A22 * u21;
196
197 // y21 = y21 - Y20 * ( U20' * u21 ) - U20 * ( Z20' * u21 );
201
204
205 // t01 = U20' * u21;
206 FLA_Copy( d0, t01 );
207
208 // z21 = z21 - U20 * ( Y20' * u21 ) - Z20 * ( U20' * u21 );
211
212 // beta = u21' * z21 / 2;
213 // conj_beta = conj(beta);
217
218 // y21' = ( y21' - beta / tau * u21' ) / tau;
219 // y21 = ( y21 - conj(beta) / tau * u21 ) / tau;
223
224 // z21 = ( z21 - beta / tau * u21 ) / tau;
226 FLA_Axpy( beta, a21, z21 );
228
229 // a12t = a12t * ( I - u21 * u21' / tau );
230 // = a12t - ( a12t * u21 ) * u21' / tau;
234
235 // A02 = A02 * ( I - u21 * u21' / tau );
236 // = A02 - ( A02 * u21 ) * u21' / tau;
239
240 // Restore first element of a21.
242 }
243
244 /*------------------------------------------------------------*/
245
246 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02,
247 a10t, alpha11, /**/ a12t,
248 /* ************** */ /* ************************ */
249 &ABL, /**/ &ABR, A20, a21, /**/ A22,
250 FLA_TL );
251 FLA_Cont_with_3x3_to_2x2( &YTL, /**/ &YTR, Y00, y01, /**/ Y02,
252 y10t, psi11, /**/ y12t,
253 /* ************** */ /* ********************** */
254 &YBL, /**/ &YBR, Y20, y21, /**/ Y22,
255 FLA_TL );
256 FLA_Cont_with_3x3_to_2x2( &ZTL, /**/ &ZTR, Z00, z01, /**/ Z02,
257 z10t, zeta11, /**/ z12t,
258 /* ************** */ /* *********************** */
259 &ZBL, /**/ &ZBR, Z20, z21, /**/ Z22,
260 FLA_TL );
261 FLA_Cont_with_3x3_to_2x2( &TTL, /**/ &TTR, T00, t01, /**/ T02,
262 t10t, tau11, /**/ t12t,
263 /* ************** */ /* ************************ */
264 &TBL, /**/ &TBR, T20, t21, /**/ T22,
265 FLA_TL );
267 delta1,
268 /* ** */ /* ****** */
269 &dB, d2, FLA_TOP );
271 epsilon1,
272 /* ** */ /* ******** */
273 &eB, e2, FLA_TOP );
275 phi1,
276 /* ** */ /* **** */
277 &fB, f2, FLA_TOP );
278 }
279
284 FLA_Obj_free( &beta );
287 FLA_Obj_free( &d );
288 FLA_Obj_free( &e );
289 FLA_Obj_free( &f );
290
291 return FLA_SUCCESS;
292}
FLA_Error FLA_Gemvc(FLA_Trans transa, FLA_Conj conjx, FLA_Obj alpha, FLA_Obj A, FLA_Obj x, FLA_Obj beta, FLA_Obj y)
Definition FLA_Gemvc.c:13

References FLA_Axpy(), FLA_Axpyt(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copy(), FLA_Copyt(), FLA_Dot(), FLA_Dotc(), FLA_Gemv(), FLA_Gemvc(), FLA_Gerc(), FLA_Househ2_UT(), FLA_Inv_scal(), FLA_Inv_scalc(), FLA_Merge_2x1(), FLA_MINUS_ONE, FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Scal(), FLA_Set(), FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_unb_var4().

◆ FLA_Hess_UT_step_unb_var5()

FLA_Error FLA_Hess_UT_step_unb_var5 ( FLA_Obj  A,
FLA_Obj  U,
FLA_Obj  Z,
FLA_Obj  T 
)
30{
31 FLA_Obj ATL, ATR, A00, a01, A02,
33 A20, a21, A22;
34 FLA_Obj UTL, UTR, U00, u01, U02,
36 U20, u21, U22;
37 FLA_Obj ZTL, ZTR, Z00, z01, Z02,
39 Z20, z21, Z22;
40 FLA_Obj TTL, TTR, T00, t01, T02,
41 TBL, TBR, t10t, tau11, t12t,
42 T20, t21, T22;
43 FLA_Obj wT, w0,
44 wB, omega1,
45 w2;
46 FLA_Obj w;
47
49 a21_b;
51 u21_b;
52
54 dim_t m_A;
56
57
59
61 m_A = FLA_Obj_length( A );
62
63 FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
64
65 FLA_Set( FLA_ZERO, U );
66 FLA_Set( FLA_ZERO, Z );
67
68 FLA_Part_2x2( A, &ATL, &ATR,
69 &ABL, &ABR, 0, 0, FLA_TL );
70 FLA_Part_2x2( U, &UTL, &UTR,
71 &UBL, &UBR, 0, 0, FLA_TL );
72 FLA_Part_2x2( Z, &ZTL, &ZTR,
73 &ZBL, &ZBR, 0, 0, FLA_TL );
74 FLA_Part_2x2( T, &TTL, &TTR,
75 &TBL, &TBR, 0, 0, FLA_TL );
76 FLA_Part_2x1( w, &wT,
77 &wB, 0, FLA_TOP );
78
79 while ( FLA_Obj_length( ATL ) < b_alg )
80 {
81 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02,
82 /* ************* */ /* ************************** */
83 &a10t, /**/ &alpha11, &a12t,
84 ABL, /**/ ABR, &A20, /**/ &a21, &A22,
85 1, 1, FLA_BR );
86 FLA_Repart_2x2_to_3x3( UTL, /**/ UTR, &U00, /**/ &u01, &U02,
87 /* ************* */ /* **************************** */
88 &u10t, /**/ &upsilon11, &u12t,
89 UBL, /**/ UBR, &U20, /**/ &u21, &U22,
90 1, 1, FLA_BR );
91 FLA_Repart_2x2_to_3x3( ZTL, /**/ ZTR, &Z00, /**/ &z01, &Z02,
92 /* ************* */ /* ************************* */
93 &z10t, /**/ &zeta11, &z12t,
94 ZBL, /**/ ZBR, &Z20, /**/ &z21, &Z22,
95 1, 1, FLA_BR );
96 FLA_Repart_2x2_to_3x3( TTL, /**/ TTR, &T00, /**/ &t01, &T02,
97 /* ************* */ /* ************************** */
98 &t10t, /**/ &tau11, &t12t,
99 TBL, /**/ TBR, &T20, /**/ &t21, &T22,
100 1, 1, FLA_BR );
102 /* ** */ /* ****** */
103 &omega1,
104 wB, &w2, 1, FLA_BOTTOM );
105
106 /*------------------------------------------------------------*/
107
108 if ( FLA_Obj_length( ATL ) > 0 )
109 {
110 // w0 = inv( triu( T00 ) ) * u10t';
113 T00, w0 );
114
115 // a01 = a01 - Z00 * w0;
116 // alpha11 = alpha11 - z10t * w0;
117 // a21 = a21 - Z20 * w0;
121
122 // w0 = inv( triu( T00 ) )' * ( U00' * a01 + u10t' * alpha11 + U20' * a21 );
124 FLA_ONE, U00, a01, FLA_ZERO, w0 );
127
129 T00, w0 );
130
131 // a01 = a01 - U00 * w0;
132 // alpha11 = alpha11 - u10t * w0;
133 // a21 = a21 - U20 * w0;
138 }
139
140 if ( FLA_Obj_length( a21 ) > 0 )
141 {
143 &a21_b, 1, FLA_TOP );
144
145 // [ u21, tau11, a21 ] = House( a21 );
147 a21_t,
148 a21_b, tau11 );
149
150 // u21 := a21;
151 FLA_Copy( a21, u21 );
152
153 // Explicitly set the first element of the Householder vector so we
154 // can use it in regular computations.
156 &u21_b, 1, FLA_TOP );
158
159 // z01 = A02 * u21;
160 // zeta11 = a12t * u21;
161 // z21 = A22 * u21;
163 FLA_Dot( a12t, u21, zeta11 );
165
166 // t01 = U20' * u21;
168 }
169
170 /*------------------------------------------------------------*/
171
172 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02,
173 a10t, alpha11, /**/ a12t,
174 /* ************** */ /* ************************ */
175 &ABL, /**/ &ABR, A20, a21, /**/ A22,
176 FLA_TL );
177 FLA_Cont_with_3x3_to_2x2( &UTL, /**/ &UTR, U00, u01, /**/ U02,
178 u10t, upsilon11, /**/ u12t,
179 /* ************** */ /* ************************** */
180 &UBL, /**/ &UBR, U20, u21, /**/ U22,
181 FLA_TL );
182 FLA_Cont_with_3x3_to_2x2( &ZTL, /**/ &ZTR, Z00, z01, /**/ Z02,
183 z10t, zeta11, /**/ z12t,
184 /* ************** */ /* *********************** */
185 &ZBL, /**/ &ZBR, Z20, z21, /**/ Z22,
186 FLA_TL );
187 FLA_Cont_with_3x3_to_2x2( &TTL, /**/ &TTR, T00, t01, /**/ T02,
188 t10t, tau11, /**/ t12t,
189 /* ************** */ /* ************************ */
190 &TBL, /**/ &TBR, T20, t21, /**/ T22,
191 FLA_TL );
193 omega1,
194 /* ** */ /* ****** */
195 &wB, w2, FLA_TOP );
196 }
197
198 FLA_Obj_free( &w );
199
200 return FLA_SUCCESS;
201}
FLA_Error FLA_Dots(FLA_Obj alpha, FLA_Obj x, FLA_Obj y, FLA_Obj beta, FLA_Obj rho)
Definition FLA_Dots.c:13
FLA_Error FLA_Trmvsx(FLA_Uplo uplo, FLA_Trans transa, FLA_Diag diag, FLA_Obj alpha, FLA_Obj A, FLA_Obj x, FLA_Obj beta, FLA_Obj y)
Definition FLA_Trmvsx.c:13
FLA_Error FLA_Trsv(FLA_Uplo uplo, FLA_Trans transa, FLA_Diag diag, FLA_Obj A, FLA_Obj x)
Definition FLA_Trsv.c:15

References FLA_Axpyt(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copy(), FLA_Copyt(), FLA_Dot(), FLA_Dots(), FLA_Gemv(), FLA_Househ2_UT(), FLA_MINUS_ONE, FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_ONE, FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Set(), FLA_Trmvsx(), FLA_Trsv(), FLA_ZERO, i, and omega1.

Referenced by FLA_Hess_UT_unb_var5().

◆ FLA_Hess_UT_unb_var1()

FLA_Error FLA_Hess_UT_unb_var1 ( FLA_Obj  A,
FLA_Obj  T 
)
14{
15 return FLA_Hess_UT_step_unb_var1( A, T );
16}
FLA_Error FLA_Hess_UT_step_unb_var1(FLA_Obj A, FLA_Obj T)
Definition FLA_Hess_UT_unb_var1.c:18

References FLA_Hess_UT_step_unb_var1(), and i.

Referenced by FLA_Hess_UT_internal().

◆ FLA_Hess_UT_unb_var2()

FLA_Error FLA_Hess_UT_unb_var2 ( FLA_Obj  A,
FLA_Obj  T 
)
14{
15 return FLA_Hess_UT_step_unb_var2( A, T );
16}
FLA_Error FLA_Hess_UT_step_unb_var2(FLA_Obj A, FLA_Obj T)
Definition FLA_Hess_UT_unb_var2.c:18

References FLA_Hess_UT_step_unb_var2(), and i.

Referenced by FLA_Hess_UT_internal().

◆ FLA_Hess_UT_unb_var3()

FLA_Error FLA_Hess_UT_unb_var3 ( FLA_Obj  A,
FLA_Obj  T 
)
14{
15 return FLA_Hess_UT_step_unb_var3( A, T );
16}
FLA_Error FLA_Hess_UT_step_unb_var3(FLA_Obj A, FLA_Obj T)
Definition FLA_Hess_UT_unb_var3.c:18

References FLA_Hess_UT_step_unb_var3(), and i.

Referenced by FLA_Hess_UT_internal().

◆ FLA_Hess_UT_unb_var4()

FLA_Error FLA_Hess_UT_unb_var4 ( FLA_Obj  A,
FLA_Obj  T 
)
14{
16 FLA_Obj Y, Z;
17
20
22
23 FLA_Obj_free( &Y );
24 FLA_Obj_free( &Z );
25
26 return r_val;
27}
FLA_Error FLA_Hess_UT_step_unb_var4(FLA_Obj A, FLA_Obj Y, FLA_Obj Z, FLA_Obj T)
Definition FLA_Hess_UT_unb_var4.c:29

References FLA_Hess_UT_step_unb_var4(), FLA_Obj_create_conf_to(), FLA_Obj_free(), and i.

Referenced by FLA_Hess_UT_internal().

◆ FLA_Hess_UT_unb_var5()

FLA_Error FLA_Hess_UT_unb_var5 ( FLA_Obj  A,
FLA_Obj  T 
)
14{
16 FLA_Obj U, Z;
17
20
22
23 FLA_Obj_free( &U );
24 FLA_Obj_free( &Z );
25
26 return r_val;
27}
FLA_Error FLA_Hess_UT_step_unb_var5(FLA_Obj A, FLA_Obj U, FLA_Obj Z, FLA_Obj T)
Definition FLA_Hess_UT_unb_var5.c:29

References FLA_Hess_UT_step_unb_var5(), FLA_Obj_create_conf_to(), FLA_Obj_free(), and i.

Referenced by FLA_Hess_UT_internal().