libflame revision_anchor
Functions
FLA_Tridiag_UT_l.h File Reference

(r)

Go to the source code of this file.

Functions

FLA_Error FLA_Tridiag_UT_l_blk_var1 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Tridiag_UT_l_unb_var1 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Tridiag_UT_l_step_unb_var1 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Tridiag_UT_l_blk_var2 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Tridiag_UT_l_blf_var2 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Tridiag_UT_l_unb_var2 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Tridiag_UT_l_step_unb_var2 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Tridiag_UT_l_blk_var3 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Tridiag_UT_l_blf_var3 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Tridiag_UT_l_unb_var3 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Tridiag_UT_l_step_unb_var3 (FLA_Obj A, FLA_Obj Z, FLA_Obj T)
 
FLA_Error FLA_Tridiag_UT_l_opt_var1 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Tridiag_UT_l_step_opt_var1 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Tridiag_UT_l_step_ops_var1 (int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Tridiag_UT_l_step_opd_var1 (int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Tridiag_UT_l_step_opc_var1 (int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Tridiag_UT_l_step_opz_var1 (int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Tridiag_UT_l_opt_var2 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Tridiag_UT_l_step_opt_var2 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Tridiag_UT_l_step_ops_var2 (int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Tridiag_UT_l_step_opd_var2 (int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Tridiag_UT_l_step_opc_var2 (int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Tridiag_UT_l_step_opz_var2 (int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Tridiag_UT_l_opt_var3 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Tridiag_UT_l_step_opt_var3 (FLA_Obj A, FLA_Obj Z, FLA_Obj T)
 
FLA_Error FLA_Tridiag_UT_l_step_ops_var3 (int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_Z, int rs_Z, int cs_Z, float *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Tridiag_UT_l_step_opd_var3 (int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_Z, int rs_Z, int cs_Z, double *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Tridiag_UT_l_step_opc_var3 (int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Tridiag_UT_l_step_opz_var3 (int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Tridiag_UT_l_ofu_var1 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Tridiag_UT_l_step_ofu_var1 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Tridiag_UT_l_step_ofs_var1 (int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Tridiag_UT_l_step_ofd_var1 (int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Tridiag_UT_l_step_ofc_var1 (int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Tridiag_UT_l_step_ofz_var1 (int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Tridiag_UT_l_ofu_var2 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Tridiag_UT_l_step_ofu_var2 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Tridiag_UT_l_step_ofs_var2 (int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Tridiag_UT_l_step_ofd_var2 (int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Tridiag_UT_l_step_ofc_var2 (int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Tridiag_UT_l_step_ofz_var2 (int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Tridiag_UT_l_ofu_var3 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Tridiag_UT_l_step_ofu_var3 (FLA_Obj A, FLA_Obj Z, FLA_Obj T)
 
FLA_Error FLA_Tridiag_UT_l_step_ofs_var3 (int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_Z, int rs_Z, int cs_Z, float *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Tridiag_UT_l_step_ofd_var3 (int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_Z, int rs_Z, int cs_Z, double *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Tridiag_UT_l_step_ofc_var3 (int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Tridiag_UT_l_step_ofz_var3 (int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Fused_Her2_Ax_l_opt_var1 (FLA_Obj alpha, FLA_Obj u, FLA_Obj z, FLA_Obj A, FLA_Obj x, FLA_Obj w)
 
FLA_Error FLA_Fused_Her2_Ax_l_ops_var1 (int m_A, float *buff_alpha, float *buff_u, int inc_u, float *buff_z, int inc_z, float *buff_A, int rs_A, int cs_A, float *buff_x, int inc_x, float *buff_w, int inc_w)
 
FLA_Error FLA_Fused_Her2_Ax_l_opd_var1 (int m_A, double *buff_alpha, double *buff_u, int inc_u, double *buff_z, int inc_z, double *buff_A, int rs_A, int cs_A, double *buff_x, int inc_x, double *buff_w, int inc_w)
 
FLA_Error FLA_Fused_Her2_Ax_l_opc_var1 (int m_A, scomplex *buff_alpha, scomplex *buff_u, int inc_u, scomplex *buff_z, int inc_z, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_x, int inc_x, scomplex *buff_w, int inc_w)
 
FLA_Error FLA_Fused_Her2_Ax_l_opz_var1 (int m_A, dcomplex *buff_alpha, dcomplex *buff_u, int inc_u, dcomplex *buff_z, int inc_z, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_x, int inc_x, dcomplex *buff_w, int inc_w)
 
FLA_Error FLA_Fused_UZhu_ZUhu_opt_var1 (FLA_Obj delta, FLA_Obj U, FLA_Obj Z, FLA_Obj t, FLA_Obj u, FLA_Obj w)
 
FLA_Error FLA_Fused_UZhu_ZUhu_ops_var1 (int m_U, int n_U, float *buff_delta, float *buff_U, int rs_U, int cs_U, float *buff_Z, int rs_Z, int cs_Z, float *buff_t, int inc_t, float *buff_u, int inc_u, float *buff_w, int inc_w)
 
FLA_Error FLA_Fused_UZhu_ZUhu_opd_var1 (int m_U, int n_U, double *buff_delta, double *buff_U, int rs_U, int cs_U, double *buff_Z, int rs_Z, int cs_Z, double *buff_t, int inc_t, double *buff_u, int inc_u, double *buff_w, int inc_w)
 
FLA_Error FLA_Fused_UZhu_ZUhu_opc_var1 (int m_U, int n_U, scomplex *buff_delta, scomplex *buff_U, int rs_U, int cs_U, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_t, int inc_t, scomplex *buff_u, int inc_u, scomplex *buff_w, int inc_w)
 
FLA_Error FLA_Fused_UZhu_ZUhu_opz_var1 (int m_U, int n_U, dcomplex *buff_delta, dcomplex *buff_U, int rs_U, int cs_U, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_t, int inc_t, dcomplex *buff_u, int inc_u, dcomplex *buff_w, int inc_w)
 

Function Documentation

◆ FLA_Fused_Her2_Ax_l_opc_var1()

FLA_Error FLA_Fused_Her2_Ax_l_opc_var1 ( int  m_A,
scomplex buff_alpha,
scomplex buff_u,
int  inc_u,
scomplex buff_z,
int  inc_z,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_x,
int  inc_x,
scomplex buff_w,
int  inc_w 
)
336{
338 int i;
339
340 bl1_csetv( m_A,
341 buff_0,
342 buff_w, inc_w );
343
344 for ( i = 0; i < m_A; ++i )
345 {
346 scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
347 scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
348
350 scomplex* u2 = buff_u + (i+1)*inc_u;
351
352 scomplex* zeta1 = buff_z + (i )*inc_z;
353 scomplex* z2 = buff_z + (i+1)*inc_z;
354
355 scomplex* chi1 = buff_x + (i )*inc_x;
356 scomplex* x2 = buff_x + (i+1)*inc_x;
357
358 scomplex* omega1 = buff_w + (i )*inc_w;
359 scomplex* w2 = buff_w + (i+1)*inc_w;
360
361 // scomplex* beta = buff_beta;
362
366
367 int m_ahead = m_A - i - 1;
368
369 /*------------------------------------------------------------*/
370
371 // bl1_ccopyconj( zeta1, &conj_zeta1 );
372 // bl1_cmult3( beta, &conj_zeta1, &minus_conj_zeta1 );
373 // bl1_cmult3( &minus_conj_zeta1, upsilon1, &temp );
374 // bl1_cadd3( &temp, alpha11, alpha11 );
375
376 //bl1_ccopyconj( upsilon1, &conj_upsilon1 );
377 //bl1_cmult3( beta, &conj_upsilon1, &minus_conj_upsilon1 );
378 //bl1_cmult3( &minus_conj_upsilon1, zeta1, &temp );
379 //bl1_cadd3( &temp, alpha11, alpha11 );
380 minus_conj_zeta1.real = - zeta1->real;
381 minus_conj_zeta1.imag = - -zeta1->imag;
382 minus_conj_upsilon1.real = - upsilon1->real;
383 minus_conj_upsilon1.imag = - -upsilon1->imag;
384
385 alpha11->real -= zeta1->real * upsilon1->real - -zeta1->imag * upsilon1->imag +
386 zeta1->real * upsilon1->real - zeta1->imag * -upsilon1->imag;
387 alpha11->imag -= -zeta1->imag * upsilon1->real + zeta1->real * upsilon1->imag +
388 zeta1->imag * upsilon1->real + zeta1->real * -upsilon1->imag;
389
391 m_ahead,
393 u2, inc_u,
394 a21, rs_A );
395/*
396 F77_caxpy( &m_ahead,
397 &minus_conj_zeta1,
398 u2, &inc_u,
399 a21, &rs_A );
400*/
401
402
404 m_ahead,
406 z2, inc_z,
407 a21, rs_A );
408/*
409 F77_caxpy( &m_ahead,
410 &minus_conj_upsilon1,
411 z2, &inc_z,
412 a21, &rs_A );
413*/
414
415 // bl1_cmult3( alpha11, chi1, &temp );
416 // bl1_cadd3( &temp, omega1, omega1 );
417 omega1->real += alpha11->real * chi1->real - alpha11->imag * chi1->imag;
418 omega1->imag += alpha11->imag * chi1->real + alpha11->real * chi1->imag;
419
421 m_ahead,
422 a21, rs_A,
423 x2, inc_x,
424 &temp );
425 // bl1_cadd3( &temp, omega1, omega1 );
426 omega1->real += temp.real;
427 omega1->imag += temp.imag;
428
430 m_ahead,
431 chi1,
432 a21, rs_A,
433 w2, inc_w );
434/*
435 F77_caxpy( &m_ahead,
436 chi1,
437 a21, &rs_A,
438 w2, &inc_w );
439*/
440
441 /*------------------------------------------------------------*/
442
443 }
444
445 return FLA_SUCCESS;
446}
FLA_Obj FLA_ZERO
Definition FLA_Init.c:20
double *restrict zeta1
Definition bl1_axmyv2.c:142
int i
Definition bl1_axmyv2.c:145
chi1
Definition bl1_axmyv2.c:366
dcomplex temp
Definition bl1_axpyv2b.c:301
upsilon1
Definition bl1_axpyv2bdotaxpy.c:225
double *restrict omega1
Definition bl1_axpyv2bdotaxpy.c:200
void bl1_caxpyv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
Definition bl1_axpyv.c:29
void bl1_cdot(conj1_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho)
Definition bl1_dot.c:39
void bl1_csetv(int m, scomplex *sigma, scomplex *x, int incx)
Definition bl1_setv.c:52
@ BLIS1_CONJUGATE
Definition blis_type_defs.h:82
@ BLIS1_NO_CONJUGATE
Definition blis_type_defs.h:81
double real
Definition blis_type_defs.h:139
double imag
Definition blis_type_defs.h:139
Definition blis_type_defs.h:133

References bl1_caxpyv(), bl1_cdot(), bl1_csetv(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, chi1, FLA_ZERO, i, dcomplex::imag, omega1, dcomplex::real, temp, upsilon1, and zeta1.

Referenced by FLA_Fused_Her2_Ax_l_opt_var1(), and FLA_Tridiag_UT_l_step_ofc_var2().

◆ FLA_Fused_Her2_Ax_l_opd_var1()

FLA_Error FLA_Fused_Her2_Ax_l_opd_var1 ( int  m_A,
double buff_alpha,
double buff_u,
int  inc_u,
double buff_z,
int  inc_z,
double buff_A,
int  rs_A,
int  cs_A,
double buff_x,
int  inc_x,
double buff_w,
int  inc_w 
)
253{
254 double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
255 int i;
256
257 bl1_dsetv( m_A,
258 buff_0,
259 buff_w, inc_w );
260
261 for ( i = 0; i < m_A; ++i )
262 {
263 double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
264 double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
265
266 double* upsilon1 = buff_u + (i )*inc_u;
267 double* u2 = buff_u + (i+1)*inc_u;
268
269 double* zeta1 = buff_z + (i )*inc_z;
270 double* z2 = buff_z + (i+1)*inc_z;
271
272 double* chi1 = buff_x + (i )*inc_x;
273 double* x2 = buff_x + (i+1)*inc_x;
274
275 double* omega1 = buff_w + (i )*inc_w;
276 double* w2 = buff_w + (i+1)*inc_w;
277
278 // double* beta = buff_beta;
279
280 double minus_conj_upsilon1;
281 double minus_conj_zeta1;
282 double temp;
283
284 int m_ahead = m_A - i - 1;
285
286 /*------------------------------------------------------------*/
287
288 // bl1_dcopyconj( zeta1, &conj_zeta1 );
289 // bl1_dmult3( beta, &conj_zeta1, &minus_conj_zeta1 );
290 // bl1_dmult3( &minus_conj_zeta1, upsilon1, &temp );
291 // bl1_dadd3( &temp, alpha11, alpha11 );
292
293 //bl1_dcopyconj( upsilon1, &conj_upsilon1 );
294 //bl1_dmult3( beta, &conj_upsilon1, &minus_conj_upsilon1 );
295 //bl1_dmult3( &minus_conj_upsilon1, zeta1, &temp );
296 //bl1_dadd3( &temp, alpha11, alpha11 );
299
300 *alpha11 -= 2.0 * *zeta1 * *upsilon1;
301
302 // bl1_dmult3( alpha11, chi1, &temp );
303 // bl1_dadd3( &temp, omega1, omega1 );
304 *omega1 += *alpha11 * *chi1;
305
308 u2, inc_u,
310 z2, inc_z,
311 a21, rs_A,
312 x2, inc_x,
313 chi1,
314 &temp,
315 w2, inc_w );
316
317 // bl1_dadd3( &temp, omega1, omega1 );
318 *omega1 += temp;
319
320 /*------------------------------------------------------------*/
321
322 }
323
324 return FLA_SUCCESS;
325}
void bl1_daxpyv2bdotaxpy(int n, double *beta, double *u, int inc_u, double *gamma, double *z, int inc_z, double *a, int inc_a, double *x, int inc_x, double *kappa, double *rho, double *w, int inc_w)
Definition bl1_axpyv2bdotaxpy.c:36
void bl1_dsetv(int m, double *sigma, double *x, int incx)
Definition bl1_setv.c:39

References bl1_daxpyv2bdotaxpy(), bl1_dsetv(), chi1, FLA_ZERO, i, omega1, temp, upsilon1, and zeta1.

Referenced by FLA_Fused_Her2_Ax_l_opt_var1(), and FLA_Tridiag_UT_l_step_ofd_var2().

◆ FLA_Fused_Her2_Ax_l_ops_var1()

FLA_Error FLA_Fused_Her2_Ax_l_ops_var1 ( int  m_A,
float buff_alpha,
float buff_u,
int  inc_u,
float buff_z,
int  inc_z,
float buff_A,
int  rs_A,
int  cs_A,
float buff_x,
int  inc_x,
float buff_w,
int  inc_w 
)
133{
134 float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
135 int i;
136
137 bl1_ssetv( m_A,
138 buff_0,
139 buff_w, inc_w );
140
141 for ( i = 0; i < m_A; ++i )
142 {
143 float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
144 float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
145
146 float* upsilon1 = buff_u + (i )*inc_u;
147 float* u2 = buff_u + (i+1)*inc_u;
148
149 float* zeta1 = buff_z + (i )*inc_z;
150 float* z2 = buff_z + (i+1)*inc_z;
151
152 float* chi1 = buff_x + (i )*inc_x;
153 float* x2 = buff_x + (i+1)*inc_x;
154
155 float* omega1 = buff_w + (i )*inc_w;
156 float* w2 = buff_w + (i+1)*inc_w;
157
158 // float* beta = buff_beta;
159
161 float minus_conj_zeta1;
162 float temp;
163
164 int m_ahead = m_A - i - 1;
165
166 /*------------------------------------------------------------*/
167
168 // bl1_scopyconj( zeta1, &conj_zeta1 );
169 // bl1_smult3( beta, &conj_zeta1, &minus_conj_zeta1 );
170 // bl1_smult3( &minus_conj_zeta1, upsilon1, &temp );
171 // bl1_sadd3( &temp, alpha11, alpha11 );
172
173 //bl1_scopyconj( upsilon1, &conj_upsilon1 );
174 //bl1_smult3( beta, &conj_upsilon1, &minus_conj_upsilon1 );
175 //bl1_smult3( &minus_conj_upsilon1, zeta1, &temp );
176 //bl1_sadd3( &temp, alpha11, alpha11 );
179
180 *alpha11 -= 2.0F * *zeta1 * *upsilon1;
181
183 m_ahead,
185 u2, inc_u,
186 a21, rs_A );
187/*
188 F77_saxpy( &m_ahead,
189 &minus_conj_zeta1,
190 u2, &inc_u,
191 a21, &rs_A );
192*/
193
194
196 m_ahead,
198 z2, inc_z,
199 a21, rs_A );
200/*
201 F77_saxpy( &m_ahead,
202 &minus_conj_upsilon1,
203 z2, &inc_z,
204 a21, &rs_A );
205*/
206
207 // bl1_smult3( alpha11, chi1, &temp );
208 // bl1_sadd3( &temp, omega1, omega1 );
209 *omega1 += *alpha11 * *chi1;
210
212 m_ahead,
213 a21, rs_A,
214 x2, inc_x,
215 &temp );
216/*
217 temp = F77_sdot( &m_ahead,
218 a21, &rs_A,
219 x2, &inc_x );
220*/
221
222 // bl1_sadd3( &temp, omega1, omega1 );
223 *omega1 += temp;
224
226 m_ahead,
227 chi1,
228 a21, rs_A,
229 w2, inc_w );
230/*
231 F77_saxpy( &m_ahead,
232 chi1,
233 a21, &rs_A,
234 w2, &inc_w );
235*/
236
237 /*------------------------------------------------------------*/
238
239 }
240
241 return FLA_SUCCESS;
242}
void bl1_saxpyv(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy)
Definition bl1_axpyv.c:13
void bl1_sdot(conj1_t conj, int n, float *x, int incx, float *y, int incy, float *rho)
Definition bl1_dot.c:13
void bl1_ssetv(int m, float *sigma, float *x, int incx)
Definition bl1_setv.c:26

References bl1_saxpyv(), bl1_sdot(), bl1_ssetv(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, chi1, FLA_ZERO, i, omega1, temp, upsilon1, and zeta1.

Referenced by FLA_Fused_Her2_Ax_l_opt_var1(), and FLA_Tridiag_UT_l_step_ofs_var2().

◆ FLA_Fused_Her2_Ax_l_opt_var1()

FLA_Error FLA_Fused_Her2_Ax_l_opt_var1 ( FLA_Obj  alpha,
FLA_Obj  u,
FLA_Obj  z,
FLA_Obj  A,
FLA_Obj  x,
FLA_Obj  w 
)
14{
15/*
16 Effective computation:
17 A = A + beta * ( u * z' + z * u' );
18 w = A * x;
19*/
20 FLA_Datatype datatype;
21 int m_A;
22 int rs_A, cs_A;
23 int inc_u, inc_z, inc_x, inc_w;
24
25 datatype = FLA_Obj_datatype( A );
26
27 m_A = FLA_Obj_length( A );
28
31
36
37
38 switch ( datatype )
39 {
40 case FLA_FLOAT:
41 {
42 float* buff_A = FLA_FLOAT_PTR( A );
43 float* buff_u = FLA_FLOAT_PTR( u );
44 float* buff_z = FLA_FLOAT_PTR( z );
45 float* buff_x = FLA_FLOAT_PTR( x );
46 float* buff_w = FLA_FLOAT_PTR( w );
47 float* buff_beta = FLA_FLOAT_PTR( beta );
48
55 buff_w, inc_w );
56
57 break;
58 }
59
60 case FLA_DOUBLE:
61 {
62 double* buff_A = FLA_DOUBLE_PTR( A );
63 double* buff_u = FLA_DOUBLE_PTR( u );
64 double* buff_z = FLA_DOUBLE_PTR( z );
65 double* buff_x = FLA_DOUBLE_PTR( x );
66 double* buff_w = FLA_DOUBLE_PTR( w );
67 double* buff_beta = FLA_DOUBLE_PTR( beta );
68
75 buff_w, inc_w );
76
77 break;
78 }
79
80 case FLA_COMPLEX:
81 {
88
95 buff_w, inc_w );
96
97 break;
98 }
99
101 {
108
110 buff_beta,
111 buff_u, inc_u,
112 buff_z, inc_z,
113 buff_A, rs_A, cs_A,
114 buff_x, inc_x,
115 buff_w, inc_w );
116
117 break;
118 }
119 }
120
121 return FLA_SUCCESS;
122}
FLA_Error FLA_Fused_Her2_Ax_l_opc_var1(int m_A, scomplex *buff_beta, scomplex *buff_u, int inc_u, scomplex *buff_z, int inc_z, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_x, int inc_x, scomplex *buff_w, int inc_w)
Definition FLA_Fused_Her2_Ax_l_opt_var1.c:329
FLA_Error FLA_Fused_Her2_Ax_l_opd_var1(int m_A, double *buff_beta, double *buff_u, int inc_u, double *buff_z, int inc_z, double *buff_A, int rs_A, int cs_A, double *buff_x, int inc_x, double *buff_w, int inc_w)
Definition FLA_Fused_Her2_Ax_l_opt_var1.c:246
FLA_Error FLA_Fused_Her2_Ax_l_opz_var1(int m_A, dcomplex *buff_beta, dcomplex *buff_u, int inc_u, dcomplex *buff_z, int inc_z, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_x, int inc_x, dcomplex *buff_w, int inc_w)
Definition FLA_Fused_Her2_Ax_l_opt_var1.c:450
FLA_Error FLA_Fused_Her2_Ax_l_ops_var1(int m_A, float *buff_beta, float *buff_u, int inc_u, float *buff_z, int inc_z, float *buff_A, int rs_A, int cs_A, float *buff_x, int inc_x, float *buff_w, int inc_w)
Definition FLA_Fused_Her2_Ax_l_opt_var1.c:126
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition FLA_Query.c:167
dim_t FLA_Obj_length(FLA_Obj obj)
Definition FLA_Query.c:116
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition FLA_Query.c:174
dim_t FLA_Obj_vector_inc(FLA_Obj obj)
Definition FLA_Query.c:145
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition FLA_Query.c:13
int FLA_Datatype
Definition FLA_type_defs.h:49
Definition blis_type_defs.h:138

References FLA_Fused_Her2_Ax_l_opc_var1(), FLA_Fused_Her2_Ax_l_opd_var1(), FLA_Fused_Her2_Ax_l_ops_var1(), FLA_Fused_Her2_Ax_l_opz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), and i.

◆ FLA_Fused_Her2_Ax_l_opz_var1()

FLA_Error FLA_Fused_Her2_Ax_l_opz_var1 ( int  m_A,
dcomplex buff_alpha,
dcomplex buff_u,
int  inc_u,
dcomplex buff_z,
int  inc_z,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_x,
int  inc_x,
dcomplex buff_w,
int  inc_w 
)
457{
458 dcomplex zero = bl1_z0();
459 int i;
460
461 bl1_zsetv( m_A,
462 &zero,
463 buff_w, inc_w );
464
465 for ( i = 0; i < m_A; ++i )
466 {
467 dcomplex* restrict alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
468 dcomplex* restrict a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
469
471 dcomplex* restrict u2 = buff_u + (i+1)*inc_u;
472
474 dcomplex* restrict z2 = buff_z + (i+1)*inc_z;
475
477 dcomplex* restrict x2 = buff_x + (i+1)*inc_x;
478
480 dcomplex* restrict w2 = buff_w + (i+1)*inc_w;
481
482 //dcomplex* restrict beta = buff_beta;
483
487
493
494 int m_ahead = m_A - i - 1;
495
496 /*------------------------------------------------------------*/
497
498 // bl1_zcopyconj( zeta1, &conj_zeta1 );
499 // bl1_zmult3( beta, &conj_zeta1, &minus_conj_zeta1 );
500 // bl1_zmult3( &minus_conj_zeta1, upsilon1, &temp );
501 // bl1_zadd3( &temp, alpha11, alpha11 );
502
503 //bl1_zcopyconj( upsilon1, &conj_upsilon1 );
504 //bl1_zmult3( beta, &conj_upsilon1, &minus_conj_upsilon1 );
505 //bl1_zmult3( &minus_conj_upsilon1, zeta1, &temp );
506 //bl1_zadd3( &temp, alpha11, alpha11 );
507 minus_conj_zeta1.real = - zeta1->real;
508 minus_conj_zeta1.imag = - -zeta1->imag;
509 minus_conj_upsilon1.real = - upsilon1->real;
510 minus_conj_upsilon1.imag = - -upsilon1->imag;
511
512 ze1 = *zeta1;
513 up1 = *upsilon1;
514 a11 = *alpha11;
515 om1 = *omega1;
516 ch1 = *chi1;
517
518 //alpha11->real -= zeta1->real * upsilon1->real - -zeta1->imag * upsilon1->imag +
519 // zeta1->real * upsilon1->real - zeta1->imag * -upsilon1->imag;
520 //alpha11->imag -= -zeta1->imag * upsilon1->real + zeta1->real * upsilon1->imag +
521 // zeta1->imag * upsilon1->real + zeta1->real * -upsilon1->imag;
522 a11.real -= ze1.real * up1.real - -ze1.imag * up1.imag +
523 up1.real * ze1.real - -up1.imag * ze1.imag;
524 a11.imag -= ze1.real * up1.imag + -ze1.imag * up1.real +
525 up1.real * ze1.imag + -up1.imag * ze1.real;
526
527 // bl1_zmult3( alpha11, chi1, &temp );
528 // bl1_zadd3( &temp, omega1, omega1 );
529 //omega1->real += alpha11->real * chi1->real - alpha11->imag * chi1->imag;
530 //omega1->imag += alpha11->imag * chi1->real + alpha11->real * chi1->imag;
531 om1.real += a11.real * ch1.real - a11.imag * ch1.imag;
532 om1.imag += a11.imag * ch1.real + a11.real * ch1.imag;
533
534 *alpha11 = a11;
535 *omega1 = om1;
536
537/*
538 bl1_zaxpyv2bdotaxpy( m_ahead,
539 &minus_conj_zeta1,
540 u2, inc_u,
541 &minus_conj_upsilon1,
542 z2, inc_z,
543 a21, rs_A,
544 x2, inc_x,
545 chi1,
546 &temp,
547 w2, inc_w );
548*/
549
553 u2, inc_u,
554 z2, inc_z,
555 a21, rs_A );
556
558 a21, rs_A,
559 x2, inc_x,
560 chi1,
561 &temp,
562 w2, inc_w );
563
564
565 // bl1_zadd3( &temp, omega1, omega1 );
566 omega1->real += temp.real;
567 omega1->imag += temp.imag;
568
569 /*------------------------------------------------------------*/
570
571 }
572
573 return FLA_SUCCESS;
574}
void bl1_zaxpyv2b(int n, dcomplex *alpha1, dcomplex *alpha2, dcomplex *x1, int inc_x1, dcomplex *x2, int inc_x2, dcomplex *y, int inc_y)
Definition bl1_axpyv2b.c:210
void bl1_zdotaxpy(int n, dcomplex *a, int inc_a, dcomplex *x, int inc_x, dcomplex *kappa, dcomplex *rho, dcomplex *w, int inc_w)
Definition bl1_dotaxpy.c:258
dcomplex bl1_z0(void)
Definition bl1_constants.c:133
void bl1_zsetv(int m, dcomplex *sigma, dcomplex *x, int incx)
Definition bl1_setv.c:66

References bl1_z0(), bl1_zaxpyv2b(), bl1_zdotaxpy(), bl1_zsetv(), chi1, i, dcomplex::imag, omega1, dcomplex::real, temp, upsilon1, and zeta1.

Referenced by FLA_Fused_Her2_Ax_l_opt_var1(), and FLA_Tridiag_UT_l_step_ofz_var2().

◆ FLA_Fused_UZhu_ZUhu_opc_var1()

FLA_Error FLA_Fused_UZhu_ZUhu_opc_var1 ( int  m_U,
int  n_U,
scomplex buff_delta,
scomplex buff_U,
int  rs_U,
int  cs_U,
scomplex buff_Z,
int  rs_Z,
int  cs_Z,
scomplex buff_t,
int  inc_t,
scomplex buff_u,
int  inc_u,
scomplex buff_w,
int  inc_w 
)
419{
420 int i;
421
422 for ( i = 0; i < n_U; ++i )
423 {
424 scomplex* u1 = buff_U + (i )*cs_U + (0 )*rs_U;
425 scomplex* z1 = buff_Z + (i )*cs_Z + (0 )*rs_Z;
427 scomplex* tau1 = buff_t + (i )*inc_t;
428 scomplex* u = buff_u;
429 scomplex* w = buff_w;
432
433 /*------------------------------------------------------------*/
434
436 m_U,
437 z1, rs_Z,
438 u, inc_u,
439 &alpha );
440
442 m_U,
443 u1, rs_U,
444 u, inc_u,
445 &beta );
446
447 *tau1 = beta;
448
450 bl1_cscals( delta, &beta );
451
453 m_U,
454 &alpha,
455 u1, rs_U,
456 w, inc_w );
457/*
458 F77_caxpy( &m_U,
459 &alpha,
460 u1, &rs_U,
461 w, &inc_w );
462*/
463
465 m_U,
466 &beta,
467 z1, rs_U,
468 w, inc_w );
469/*
470 F77_caxpy( &m_U,
471 &beta,
472 z1, &rs_Z,
473 w, &inc_w );
474*/
475
476 /*------------------------------------------------------------*/
477
478 }
479
480 return FLA_SUCCESS;
481}
double *restrict z1
Definition bl1_dotsv2.c:148

References bl1_caxpyv(), bl1_cdot(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, i, and z1.

Referenced by FLA_Fused_UZhu_ZUhu_opt_var1(), and FLA_Tridiag_UT_l_step_ofc_var3().

◆ FLA_Fused_UZhu_ZUhu_opd_var1()

FLA_Error FLA_Fused_UZhu_ZUhu_opd_var1 ( int  m_U,
int  n_U,
double buff_delta,
double buff_U,
int  rs_U,
int  cs_U,
double buff_Z,
int  rs_Z,
int  cs_Z,
double buff_t,
int  inc_t,
double buff_u,
int  inc_u,
double buff_w,
int  inc_w 
)
230{
231 double zero = bl1_d0();
232
233 int n_run = n_U / 2;
234 int n_left = n_U % 2;
235 int step_u = 2*cs_U;
236 int step_z = 2*cs_Z;
237 int step_tau = 2*inc_t;
238 int i;
239
240 double* u = buff_u;
241 double* w = buff_w;
242 //double* delta = buff_delta;
243
244 double* u1;
245 double* u2;
246 double* u3;
247 double* z1;
248 double* z2;
249 double* z3;
250 double* tau1;
251 double* tau2;
252 double* tau3;
253
254 u1 = buff_U;
255 u2 = buff_U + cs_U;
256 u3 = buff_U + 2*cs_U;
257 z1 = buff_Z;
258 z2 = buff_Z + cs_Z;
259 z3 = buff_Z + 2*cs_Z;
260 tau1 = buff_t;
261 tau2 = buff_t + inc_t;
262 tau3 = buff_t + 2*inc_t;
263
264 for ( i = 0; i < n_run; ++i )
265 {
266 double rho_z1u;
267 double rho_z2u;
268 //double rho_z3u;
269 double rho_u1u;
270 double rho_u2u;
271 //double rho_u3u;
272
273 /*------------------------------------------------------------*/
274/*
275 bl1_ddotsv3( BLIS1_CONJUGATE,
276 m_U,
277 z1, rs_Z,
278 z2, rs_Z,
279 z3, rs_Z,
280 u, inc_u,
281 &zero,
282 &rho_z1u,
283 &rho_z2u,
284 &rho_z3u );
285 bl1_dneg1( &rho_z1u );
286 bl1_dneg1( &rho_z2u );
287 bl1_dneg1( &rho_z3u );
288
289 bl1_ddotv2axpyv2b( m_U,
290 u1, rs_U,
291 u2, rs_U,
292 u, inc_u,
293 &rho_z1u,
294 &rho_z2u,
295 &rho_u1u,
296 &rho_u2u,
297 w, inc_w );
298 bl1_ddotaxpy( m_U,
299 u3, rs_U,
300 u, inc_u,
301 &rho_z3u,
302 &rho_u3u,
303 w, inc_w );
304
305 *tau1 = rho_u1u;
306 *tau2 = rho_u2u;
307 *tau3 = rho_u3u;
308
309 bl1_dneg1( &rho_u1u );
310 bl1_dneg1( &rho_u2u );
311 bl1_dneg1( &rho_u3u );
312
313 bl1_daxpyv3b( m_U,
314 &rho_u1u,
315 &rho_u2u,
316 &rho_u3u,
317 z1, rs_Z,
318 z2, rs_Z,
319 z3, rs_Z,
320 w, inc_w );
321*/
323 m_U,
324 z1, rs_Z,
325 z2, rs_Z,
326 u, inc_u,
327 &zero,
328 &rho_z1u,
329 &rho_z2u );
330 bl1_dneg1( &rho_z1u );
331 bl1_dneg1( &rho_z2u );
332
334 u1, rs_U,
335 u2, rs_U,
336 u, inc_u,
337 &rho_z1u,
338 &rho_z2u,
339 &rho_u1u,
340 &rho_u2u,
341 w, inc_w );
342
343 *tau1 = rho_u1u;
344 *tau2 = rho_u2u;
345
346 bl1_dneg1( &rho_u1u );
347 bl1_dneg1( &rho_u2u );
348
350 &rho_u1u,
351 &rho_u2u,
352 z1, rs_Z,
353 z2, rs_Z,
354 w, inc_w );
355
356
357 /*------------------------------------------------------------*/
358
359 u1 += step_u;
360 u2 += step_u;
361 u3 += step_u;
362 z1 += step_z;
363 z2 += step_z;
364 z3 += step_z;
365 tau1 += step_tau;
366 tau2 += step_tau;
367 tau3 += step_tau;
368 }
369
370 if ( n_left > 0 )
371 {
372 for ( i = 0; i < n_left; ++i )
373 {
374 double rho_z1u;
375 double rho_u1u;
376
378 m_U,
379 z1, rs_Z,
380 u, inc_u,
381 &rho_z1u );
382 bl1_dneg1( &rho_z1u );
383
385 u1, rs_U,
386 u, inc_u,
387 &rho_z1u,
388 &rho_u1u,
389 w, inc_w );
390
391 *tau1 = rho_u1u;
392
393 bl1_dneg1( &rho_u1u );
395 m_U,
396 &rho_u1u,
397 z1, rs_Z,
398 w, inc_w );
399
400 u1 += cs_U;
401 z1 += cs_Z;
402 tau1 += inc_t;
403 }
404 }
405
406 return FLA_SUCCESS;
407}
int n_left
Definition bl1_axmyv2.c:149
int n_run
Definition bl1_axmyv2.c:148
void bl1_daxpyv2b(int n, double *alpha1, double *alpha2, double *x1, int inc_x1, double *x2, int inc_x2, double *y, int inc_y)
Definition bl1_axpyv2b.c:31
void bl1_daxpyv(conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy)
Definition bl1_axpyv.c:21
void bl1_ddot(conj1_t conj, int n, double *x, int incx, double *y, int incy, double *rho)
Definition bl1_dot.c:26
void bl1_ddotaxpy(int n, double *a, int inc_a, double *x, int inc_x, double *kappa, double *rho, double *w, int inc_w)
Definition bl1_dotaxpy.c:31
void bl1_ddotsv2(conj1_t conjxy, int n, double *x, int inc_x, double *y, int inc_y, double *z, int inc_z, double *beta, double *rho_xz, double *rho_yz)
Definition bl1_dotsv2.c:35
void bl1_ddotv2axpyv2b(int n, double *a1, int inc_a1, double *a2, int inc_a2, double *x, int inc_x, double *kappa1, double *kappa2, double *rho1, double *rho2, double *w, int inc_w)
Definition bl1_dotv2axpyv2b.c:36
double bl1_d0(void)
Definition bl1_constants.c:118

References bl1_d0(), bl1_daxpyv(), bl1_daxpyv2b(), bl1_ddot(), bl1_ddotaxpy(), bl1_ddotsv2(), bl1_ddotv2axpyv2b(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, i, n_left, n_run, and z1.

Referenced by FLA_Fused_UZhu_ZUhu_opt_var1(), and FLA_Tridiag_UT_l_step_ofd_var3().

◆ FLA_Fused_UZhu_ZUhu_ops_var1()

FLA_Error FLA_Fused_UZhu_ZUhu_ops_var1 ( int  m_U,
int  n_U,
float buff_delta,
float buff_U,
int  rs_U,
int  cs_U,
float buff_Z,
int  rs_Z,
int  cs_Z,
float buff_t,
int  inc_t,
float buff_u,
int  inc_u,
float buff_w,
int  inc_w 
)
144{
145 int i;
146
147 for ( i = 0; i < n_U; ++i )
148 {
149 float* u1 = buff_U + (i )*cs_U + (0 )*rs_U;
150 float* z1 = buff_Z + (i )*cs_Z + (0 )*rs_Z;
151 float* delta = buff_delta;
152 float* tau1 = buff_t + (i )*inc_t;
153 float* u = buff_u;
154 float* w = buff_w;
155 float alpha;
156 float beta;
157
158 /*------------------------------------------------------------*/
159
161 m_U,
162 z1, rs_Z,
163 u, inc_u,
164 &alpha );
165/*
166 alpha = F77_sdot( &m_U,
167 z1, &rs_Z,
168 u, &inc_u );
169*/
170
172 m_U,
173 u1, rs_U,
174 u, inc_u,
175 &beta );
176/*
177 beta = F77_sdot( &m_U,
178 u1, &rs_U,
179 u, &inc_u );
180*/
181
182 *tau1 = beta;
183
184 // bl1_sscals( delta, &alpha );
185 // bl1_sscals( delta, &beta );
186 alpha *= *delta;
187 beta *= *delta;
188
190 m_U,
191 &alpha,
192 u1, rs_U,
193 w, inc_w );
194/*
195 F77_saxpy( &m_U,
196 &alpha,
197 u1, &rs_U,
198 w, &inc_w );
199*/
200
202 m_U,
203 &beta,
204 z1, rs_U,
205 w, inc_w );
206/*
207 F77_saxpy( &m_U,
208 &beta,
209 z1, &rs_Z,
210 w, &inc_w );
211*/
212
213 /*------------------------------------------------------------*/
214
215 }
216
217 return FLA_SUCCESS;
218}

References bl1_saxpyv(), bl1_sdot(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, i, and z1.

Referenced by FLA_Fused_UZhu_ZUhu_opt_var1(), and FLA_Tridiag_UT_l_step_ofs_var3().

◆ FLA_Fused_UZhu_ZUhu_opt_var1()

FLA_Error FLA_Fused_UZhu_ZUhu_opt_var1 ( FLA_Obj  delta,
FLA_Obj  U,
FLA_Obj  Z,
FLA_Obj  t,
FLA_Obj  u,
FLA_Obj  w 
)
14{
15/*
16 Effective computation:
17 w = w + delta * ( U ( Z' u ) + Z ( U' u ) );
18 t = U' u;
19*/
20 FLA_Datatype datatype;
21 int m_U, n_U;
22 int rs_U, cs_U;
23 int rs_Z, cs_Z;
24 int inc_u, inc_w, inc_t;
25
26 datatype = FLA_Obj_datatype( U );
27
28 m_U = FLA_Obj_length( U );
29 n_U = FLA_Obj_width( U );
30
33
36
38
40
42
43
44 switch ( datatype )
45 {
46 case FLA_FLOAT:
47 {
48 float* buff_U = FLA_FLOAT_PTR( U );
49 float* buff_Z = FLA_FLOAT_PTR( Z );
50 float* buff_t = FLA_FLOAT_PTR( t );
51 float* buff_u = FLA_FLOAT_PTR( u );
52 float* buff_w = FLA_FLOAT_PTR( w );
53 float* buff_delta = FLA_FLOAT_PTR( delta );
54
56 n_U,
62 buff_w, inc_w );
63
64 break;
65 }
66
67 case FLA_DOUBLE:
68 {
69 double* buff_U = FLA_DOUBLE_PTR( U );
70 double* buff_Z = FLA_DOUBLE_PTR( Z );
71 double* buff_t = FLA_DOUBLE_PTR( t );
72 double* buff_u = FLA_DOUBLE_PTR( u );
73 double* buff_w = FLA_DOUBLE_PTR( w );
74 double* buff_delta = FLA_DOUBLE_PTR( delta );
75
77 n_U,
83 buff_w, inc_w );
84
85 break;
86 }
87
88 case FLA_COMPLEX:
89 {
96
98 n_U,
100 buff_U, rs_U, cs_U,
101 buff_Z, rs_Z, cs_Z,
102 buff_u, inc_u,
103 buff_t, inc_t,
104 buff_w, inc_w );
105
106 break;
107 }
108
110 {
117
119 n_U,
121 buff_U, rs_U, cs_U,
122 buff_Z, rs_Z, cs_Z,
123 buff_t, inc_t,
124 buff_u, inc_u,
125 buff_w, inc_w );
126
127 break;
128 }
129 }
130
131 return FLA_SUCCESS;
132}
FLA_Error FLA_Fused_UZhu_ZUhu_opd_var1(int m_U, int n_U, double *buff_delta, double *buff_U, int rs_U, int cs_U, double *buff_Z, int rs_Z, int cs_Z, double *buff_t, int inc_t, double *buff_u, int inc_u, double *buff_w, int inc_w)
Definition FLA_Fused_UZhu_ZUhu_opt_var1.c:222
FLA_Error FLA_Fused_UZhu_ZUhu_opc_var1(int m_U, int n_U, scomplex *buff_delta, scomplex *buff_U, int rs_U, int cs_U, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_t, int inc_t, scomplex *buff_u, int inc_u, scomplex *buff_w, int inc_w)
Definition FLA_Fused_UZhu_ZUhu_opt_var1.c:411
FLA_Error FLA_Fused_UZhu_ZUhu_ops_var1(int m_U, int n_U, float *buff_delta, float *buff_U, int rs_U, int cs_U, float *buff_Z, int rs_Z, int cs_Z, float *buff_t, int inc_t, float *buff_u, int inc_u, float *buff_w, int inc_w)
Definition FLA_Fused_UZhu_ZUhu_opt_var1.c:136
FLA_Error FLA_Fused_UZhu_ZUhu_opz_var1(int m_U, int n_U, dcomplex *buff_delta, dcomplex *buff_U, int rs_U, int cs_U, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_t, int inc_t, dcomplex *buff_u, int inc_u, dcomplex *buff_w, int inc_w)
Definition FLA_Fused_UZhu_ZUhu_opt_var1.c:485
dim_t FLA_Obj_width(FLA_Obj obj)
Definition FLA_Query.c:123

References FLA_Fused_UZhu_ZUhu_opc_var1(), FLA_Fused_UZhu_ZUhu_opd_var1(), FLA_Fused_UZhu_ZUhu_ops_var1(), FLA_Fused_UZhu_ZUhu_opz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), FLA_Obj_width(), and i.

◆ FLA_Fused_UZhu_ZUhu_opz_var1()

FLA_Error FLA_Fused_UZhu_ZUhu_opz_var1 ( int  m_U,
int  n_U,
dcomplex buff_delta,
dcomplex buff_U,
int  rs_U,
int  cs_U,
dcomplex buff_Z,
int  rs_Z,
int  cs_Z,
dcomplex buff_t,
int  inc_t,
dcomplex buff_u,
int  inc_u,
dcomplex buff_w,
int  inc_w 
)
493{
494 //dcomplex zero = bl1_z0();
495
496 int n_run = n_U / 1;
497 int n_left = n_U % 1;
498 int step_u = 1*cs_U;
499 int step_z = 1*cs_Z;
500 int step_tau = 1*inc_t;
501 int i;
502
503 dcomplex* u = buff_u;
504 dcomplex* w = buff_w;
505 //dcomplex* delta = buff_delta;
506
507 dcomplex* u1;
508 dcomplex* u2;
509 dcomplex* z1;
510 dcomplex* z2;
511 dcomplex* tau1;
512 dcomplex* tau2;
513
514 u1 = buff_U;
515 u2 = buff_U + cs_U;
516 z1 = buff_Z;
517 z2 = buff_Z + cs_Z;
518 tau1 = buff_t;
519 tau2 = buff_t + inc_t;
520
521 for ( i = 0; i < n_run; ++i )
522 {
524 //dcomplex rho_z2u;
526 //dcomplex rho_u2u;
527
528 /*------------------------------------------------------------*/
529
530/*
531 Effective computation:
532 w = w + delta * ( U ( Z' u ) + Z ( U' u ) );
533*/
534
535/*
536 bl1_zdotsv2( BLIS1_CONJUGATE,
537 m_U,
538 z1, rs_Z,
539 u1, rs_U,
540 u, inc_u,
541 &zero,
542 &rho_z1u,
543 &rho_u1u );
544
545 *tau1 = rho_u1u;
546
547 //bl1_zscals( delta, &rho_z1u );
548 //bl1_zscals( delta, &rho_u1u );
549 bl1_zneg1( &rho_z1u );
550 bl1_zneg1( &rho_u1u );
551
552 bl1_zaxpyv2b( m_U,
553 &rho_z1u,
554 &rho_u1u,
555 u1, rs_U,
556 z1, rs_Z,
557 w, inc_w );
558*/
559/*
560 bl1_zdotsv2( BLIS1_CONJUGATE,
561 m_U,
562 z1, rs_Z,
563 z2, rs_Z,
564 u, inc_u,
565 &zero,
566 &rho_z1u,
567 &rho_z2u );
568 bl1_zneg1( &rho_z1u );
569 bl1_zneg1( &rho_z2u );
570
571 bl1_zdotv2axpyv2b( m_U,
572 u1, rs_U,
573 u2, rs_U,
574 u, inc_u,
575 &rho_z1u,
576 &rho_z2u,
577 &rho_u1u,
578 &rho_u2u,
579 w, inc_w );
580
581 *tau1 = rho_u1u;
582 *tau2 = rho_u2u;
583
584 bl1_zneg1( &rho_u1u );
585 bl1_zneg1( &rho_u2u );
586
587 bl1_zaxpyv2b( m_U,
588 &rho_u1u,
589 &rho_u2u,
590 z1, rs_Z,
591 z2, rs_Z,
592 w, inc_w );
593*/
595 m_U,
596 z1, rs_Z,
597 u, inc_u,
598 &rho_z1u );
599 bl1_zneg1( &rho_z1u );
600
602 u1, rs_U,
603 u, inc_u,
604 &rho_z1u,
605 &rho_u1u,
606 w, inc_w );
607
608 *tau1 = rho_u1u;
609
610 bl1_zneg1( &rho_u1u );
611
613 m_U,
614 &rho_u1u,
615 z1, rs_Z,
616 w, inc_w );
617
618 /*------------------------------------------------------------*/
619
620 u1 += step_u;
621 u2 += step_u;
622 z1 += step_z;
623 z2 += step_z;
624 tau1 += step_tau;
625 tau2 += step_tau;
626 }
627
628 if ( n_left == 1 )
629 {
632
634 m_U,
635 z1, rs_Z,
636 u, inc_u,
637 &rho_z1u );
638 bl1_zneg1( &rho_z1u );
639
641 u1, rs_U,
642 u, inc_u,
643 &rho_z1u,
644 &rho_u1u,
645 w, inc_w );
646
647 *tau1 = rho_u1u;
648
649 bl1_zneg1( &rho_u1u );
651 m_U,
652 &rho_u1u,
653 z1, rs_Z,
654 w, inc_w );
655 }
656
657 return FLA_SUCCESS;
658}
void bl1_zaxpyv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy)
Definition bl1_axpyv.c:60
void bl1_zdot(conj1_t conj, int n, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *rho)
Definition bl1_dot.c:65

References bl1_zaxpyv(), bl1_zdot(), bl1_zdotaxpy(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, i, n_left, n_run, and z1.

Referenced by FLA_Fused_UZhu_ZUhu_opt_var1(), and FLA_Tridiag_UT_l_step_ofz_var3().

◆ FLA_Tridiag_UT_l_blf_var2()

FLA_Error FLA_Tridiag_UT_l_blf_var2 ( FLA_Obj  A,
FLA_Obj  T 
)
14{
15 FLA_Obj ATL, ATR, A00, A01, A02,
16 ABL, ABR, A10, A11, A12,
17 A20, A21, A22;
18 FLA_Obj TL, TR, T0, T1, T2;
19
22 dim_t b_alg, b;
23
25
26 FLA_Part_2x2( A, &ATL, &ATR,
27 &ABL, &ABR, 0, 0, FLA_TL );
28 FLA_Part_1x2( T, &TL, &TR, 0, FLA_LEFT );
29
30 while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) )
31 {
32 b = min( FLA_Obj_length( ABR ), b_alg );
33
34 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
35 /* ************* */ /* ******************** */
36 &A10, /**/ &A11, &A12,
37 ABL, /**/ ABR, &A20, /**/ &A21, &A22,
38 b, b, FLA_BR );
39 FLA_Repart_1x2_to_1x3( TL, /**/ TR, &T0, /**/ &T1, &T2,
40 b, FLA_RIGHT );
41
42 /*------------------------------------------------------------*/
43
45 &none2, &none3, b, b, FLA_TL );
46
47 // [ ABR, T1 ] = FLA_Tridiag_UT_l_step_unb_var2( ABR, T1, b );
48 //FLA_Tridiag_UT_l_step_unb_var2( ABR, T1_tl );
50 //FLA_Tridiag_UT_l_step_opt_var2( ABR, T1_tl );
51
52 /*------------------------------------------------------------*/
53
54 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
55 A10, A11, /**/ A12,
56 /* ************** */ /* ****************** */
57 &ABL, /**/ &ABR, A20, A21, /**/ A22,
58 FLA_TL );
59 FLA_Cont_with_1x3_to_1x2( &TL, /**/ &TR, T0, T1, /**/ T2,
60 FLA_LEFT );
61 }
62
63 return FLA_SUCCESS;
64}
FLA_Error FLA_Tridiag_UT_l_step_ofu_var2(FLA_Obj A, FLA_Obj T)
Definition FLA_Tridiag_UT_l_fus_var2.c:18
FLA_Error FLA_Cont_with_3x3_to_2x2(FLA_Obj *ATL, FLA_Obj *ATR, FLA_Obj A00, FLA_Obj A01, FLA_Obj A02, FLA_Obj A10, FLA_Obj A11, FLA_Obj A12, FLA_Obj *ABL, FLA_Obj *ABR, FLA_Obj A20, FLA_Obj A21, FLA_Obj A22, FLA_Quadrant quadrant)
Definition FLA_View.c:304
FLA_Error FLA_Part_2x2(FLA_Obj A, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition FLA_View.c:17
FLA_Error FLA_Cont_with_1x3_to_1x2(FLA_Obj *AL, FLA_Obj *AR, FLA_Obj A0, FLA_Obj A1, FLA_Obj A2, FLA_Side side)
Definition FLA_View.c:475
FLA_Error FLA_Part_1x2(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition FLA_View.c:110
FLA_Error FLA_Repart_2x2_to_3x3(FLA_Obj ATL, FLA_Obj ATR, FLA_Obj *A00, FLA_Obj *A01, FLA_Obj *A02, FLA_Obj *A10, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj ABL, FLA_Obj ABR, FLA_Obj *A20, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition FLA_View.c:142
FLA_Error FLA_Repart_1x2_to_1x3(FLA_Obj AL, FLA_Obj AR, FLA_Obj *A0, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition FLA_View.c:267
unsigned long dim_t
Definition FLA_type_defs.h:71
Definition FLA_type_defs.h:159

References FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x3_to_2x2(), FLA_Obj_length(), FLA_Part_1x2(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x2_to_3x3(), FLA_Tridiag_UT_l_step_ofu_var2(), and i.

Referenced by FLA_Tridiag_UT_l().

◆ FLA_Tridiag_UT_l_blf_var3()

FLA_Error FLA_Tridiag_UT_l_blf_var3 ( FLA_Obj  A,
FLA_Obj  T 
)
14{
15 FLA_Obj ATL, ATR, A00, A01, A02,
16 ABL, ABR, A10, A11, A12,
17 A20, A21, A22;
18 FLA_Obj UT, U0,
19 UB, U1,
20 U2;
21 FLA_Obj ZT, Z0,
22 ZB, Z1,
23 Z2;
24 FLA_Obj TL, TR, T0, T1, T2;
25
26 FLA_Obj U, Z;
33 UB_bl;
35 dim_t m_A;
36 dim_t b_alg, b, bb;
37
39
41 m_A = FLA_Obj_length( A );
42
43 FLA_Obj_create( datatype_A, m_A, b_alg, 0, 0, &U );
44 FLA_Obj_create( datatype_A, m_A, b_alg, 0, 0, &Z );
45
46 FLA_Part_2x2( A, &ATL, &ATR,
47 &ABL, &ABR, 0, 0, FLA_TL );
48 FLA_Part_2x1( U, &UT,
49 &UB, 0, FLA_TOP );
50 FLA_Part_2x1( Z, &ZT,
51 &ZB, 0, FLA_TOP );
52 FLA_Part_1x2( T, &TL, &TR, 0, FLA_LEFT );
53
54 while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) )
55 {
56 b = min( FLA_Obj_length( ABR ), b_alg );
57
58 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
59 /* ************* */ /* ******************** */
60 &A10, /**/ &A11, &A12,
61 ABL, /**/ ABR, &A20, /**/ &A21, &A22,
62 b, b, FLA_BR );
64 /* ** */ /* ** */
65 &U1,
66 UB, &U2, b, FLA_BOTTOM );
68 /* ** */ /* ** */
69 &Z1,
70 ZB, &Z2, b, FLA_BOTTOM );
71 FLA_Repart_1x2_to_1x3( TL, /**/ TR, &T0, /**/ &T1, &T2,
72 b, FLA_RIGHT );
73
74 /*------------------------------------------------------------*/
75
77 &none2, &none3, b, b, FLA_TL );
78
79 bb = min( FLA_Obj_length( ABR ) - 1, b_alg );
80
84
86 &U2_l, b, FLA_TOP );
88 &Z2_l, b, FLA_TOP );
89
90 // [ ABR, ZB, T1 ] = FLA_Tridiag_UT_l_step_unb_var3( ABR, ZB, T1, b );
91 //FLA_Tridiag_UT_l_step_unb_var3( ABR, ZB, T1_tl );
93 //FLA_Tridiag_UT_l_step_opt_var3( ABR, ZB, T1_tl );
94
95 if ( FLA_Obj_length( A22 ) > 0 )
96 {
97 // Build UB from ABR, with explicit unit subdiagonal and zeros.
100 &UB_bl, 1, FLA_TOP );
103
104 // A22 = A22 - U2 * Y2' - Z2 * U2';
107 }
108
109 /*------------------------------------------------------------*/
110
111 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
112 A10, A11, /**/ A12,
113 /* ************** */ /* ****************** */
114 &ABL, /**/ &ABR, A20, A21, /**/ A22,
115 FLA_TL );
117 U1,
118 /* ** */ /* ** */
119 &UB, U2, FLA_TOP );
121 Z1,
122 /* ** */ /* ** */
123 &ZB, Z2, FLA_TOP );
124 FLA_Cont_with_1x3_to_1x2( &TL, /**/ &TR, T0, T1, /**/ T2,
125 FLA_LEFT );
126 }
127
128 FLA_Obj_free( &U );
129 FLA_Obj_free( &Z );
130
131 return FLA_SUCCESS;
132}
FLA_Error FLA_Tridiag_UT_l_step_ofu_var3(FLA_Obj A, FLA_Obj Z, FLA_Obj T)
Definition FLA_Tridiag_UT_l_fus_var3.c:27
FLA_Error FLA_Copy_external(FLA_Obj A, FLA_Obj B)
Definition FLA_Copy_external.c:13
FLA_Error FLA_Her2k_external(FLA_Uplo uplo, FLA_Trans trans, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C)
Definition FLA_Her2k_external.c:13
FLA_Obj FLA_MINUS_ONE
Definition FLA_Init.c:22
FLA_Obj FLA_ONE
Definition FLA_Init.c:18
FLA_Error FLA_Cont_with_3x1_to_2x1(FLA_Obj *AT, FLA_Obj A0, FLA_Obj A1, FLA_Obj *AB, FLA_Obj A2, FLA_Side side)
Definition FLA_View.c:428
FLA_Error FLA_Repart_2x1_to_3x1(FLA_Obj AT, FLA_Obj *A0, FLA_Obj *A1, FLA_Obj AB, FLA_Obj *A2, dim_t mb, FLA_Side side)
Definition FLA_View.c:226
FLA_Error FLA_Obj_create(FLA_Datatype datatype, dim_t m, dim_t n, dim_t rs, dim_t cs, FLA_Obj *obj)
Definition FLA_Obj.c:55
FLA_Error FLA_Part_2x1(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t mb, FLA_Side side)
Definition FLA_View.c:76
FLA_Error FLA_Obj_free(FLA_Obj *obj)
Definition FLA_Obj.c:588
FLA_Error FLA_Triangularize(FLA_Uplo uplo, FLA_Diag diag, FLA_Obj A)
Definition FLA_Triangularize.c:13
FLA_Error FLA_Set(FLA_Obj alpha, FLA_Obj A)
Definition FLA_Set.c:13

References FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copy_external(), FLA_Her2k_external(), FLA_MINUS_ONE, FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Set(), FLA_Triangularize(), FLA_Tridiag_UT_l_step_ofu_var3(), FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l().

◆ FLA_Tridiag_UT_l_blk_var1()

FLA_Error FLA_Tridiag_UT_l_blk_var1 ( FLA_Obj  A,
FLA_Obj  T 
)
14{
15 FLA_Obj ATL, ATR, A00, A01, A02,
16 ABL, ABR, A10, A11, A12,
17 A20, A21, A22;
18 FLA_Obj TL, TR, T0, T1, T2;
19
22 dim_t b_alg, b;
23
25
26 FLA_Part_2x2( A, &ATL, &ATR,
27 &ABL, &ABR, 0, 0, FLA_TL );
28 FLA_Part_1x2( T, &TL, &TR, 0, FLA_LEFT );
29
30 while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) )
31 {
32 b = min( FLA_Obj_length( ABR ), b_alg );
33
34 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
35 /* ************* */ /* ******************** */
36 &A10, /**/ &A11, &A12,
37 ABL, /**/ ABR, &A20, /**/ &A21, &A22,
38 b, b, FLA_BR );
39 FLA_Repart_1x2_to_1x3( TL, /**/ TR, &T0, /**/ &T1, &T2,
40 b, FLA_RIGHT );
41
42 /*------------------------------------------------------------*/
43
45 &none2, &none3, b, b, FLA_TL );
46
47 // [ ABR, T1 ] = FLA_Tridiag_UT_l_step_unb_var1( ABR, T1, b );
48 //FLA_Tridiag_UT_l_step_unb_var1( ABR, T1_tl );
49 //FLA_Tridiag_UT_l_step_ofu_var1( ABR, T1_tl );
51
52 /*------------------------------------------------------------*/
53
54 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
55 A10, A11, /**/ A12,
56 /* ************** */ /* ****************** */
57 &ABL, /**/ &ABR, A20, A21, /**/ A22,
58 FLA_TL );
59 FLA_Cont_with_1x3_to_1x2( &TL, /**/ &TR, T0, T1, /**/ T2,
60 FLA_LEFT );
61 }
62
63 return FLA_SUCCESS;
64}
FLA_Error FLA_Tridiag_UT_l_step_opt_var1(FLA_Obj A, FLA_Obj T)
Definition FLA_Tridiag_UT_l_opt_var1.c:18

References FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x3_to_2x2(), FLA_Obj_length(), FLA_Part_1x2(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x2_to_3x3(), FLA_Tridiag_UT_l_step_opt_var1(), and i.

Referenced by FLA_Tridiag_UT_l().

◆ FLA_Tridiag_UT_l_blk_var2()

FLA_Error FLA_Tridiag_UT_l_blk_var2 ( FLA_Obj  A,
FLA_Obj  T 
)
14{
15 FLA_Obj ATL, ATR, A00, A01, A02,
16 ABL, ABR, A10, A11, A12,
17 A20, A21, A22;
18 FLA_Obj TL, TR, T0, T1, T2;
19
22 dim_t b_alg, b;
23
25
26 FLA_Part_2x2( A, &ATL, &ATR,
27 &ABL, &ABR, 0, 0, FLA_TL );
28 FLA_Part_1x2( T, &TL, &TR, 0, FLA_LEFT );
29
30 while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) )
31 {
32 b = min( FLA_Obj_length( ABR ), b_alg );
33
34 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
35 /* ************* */ /* ******************** */
36 &A10, /**/ &A11, &A12,
37 ABL, /**/ ABR, &A20, /**/ &A21, &A22,
38 b, b, FLA_BR );
39 FLA_Repart_1x2_to_1x3( TL, /**/ TR, &T0, /**/ &T1, &T2,
40 b, FLA_RIGHT );
41
42 /*------------------------------------------------------------*/
43
45 &none2, &none3, b, b, FLA_TL );
46
47 // [ ABR, T1 ] = FLA_Tridiag_UT_l_step_unb_var2( ABR, T1, b );
48 //FLA_Tridiag_UT_l_step_unb_var2( ABR, T1_tl );
49 //FLA_Tridiag_UT_l_step_ofu_var2( ABR, T1_tl );
51
52 /*------------------------------------------------------------*/
53
54 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
55 A10, A11, /**/ A12,
56 /* ************** */ /* ****************** */
57 &ABL, /**/ &ABR, A20, A21, /**/ A22,
58 FLA_TL );
59 FLA_Cont_with_1x3_to_1x2( &TL, /**/ &TR, T0, T1, /**/ T2,
60 FLA_LEFT );
61 }
62
63 return FLA_SUCCESS;
64}
FLA_Error FLA_Tridiag_UT_l_step_opt_var2(FLA_Obj A, FLA_Obj T)
Definition FLA_Tridiag_UT_l_opt_var2.c:18

References FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x3_to_2x2(), FLA_Obj_length(), FLA_Part_1x2(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x2_to_3x3(), FLA_Tridiag_UT_l_step_opt_var2(), and i.

Referenced by FLA_Tridiag_UT_l().

◆ FLA_Tridiag_UT_l_blk_var3()

FLA_Error FLA_Tridiag_UT_l_blk_var3 ( FLA_Obj  A,
FLA_Obj  T 
)
14{
15 FLA_Obj ATL, ATR, A00, A01, A02,
16 ABL, ABR, A10, A11, A12,
17 A20, A21, A22;
18 FLA_Obj UT, U0,
19 UB, U1,
20 U2;
21 FLA_Obj ZT, Z0,
22 ZB, Z1,
23 Z2;
24 FLA_Obj TL, TR, T0, T1, T2;
25
26 FLA_Obj U, Z;
33 UB_bl;
35 dim_t m_A;
36 dim_t b_alg, b, bb;
37
39
41 m_A = FLA_Obj_length( A );
42
43 FLA_Obj_create( datatype_A, m_A, b_alg, 0, 0, &U );
44 FLA_Obj_create( datatype_A, m_A, b_alg, 0, 0, &Z );
45
46 FLA_Part_2x2( A, &ATL, &ATR,
47 &ABL, &ABR, 0, 0, FLA_TL );
48 FLA_Part_2x1( U, &UT,
49 &UB, 0, FLA_TOP );
50 FLA_Part_2x1( Z, &ZT,
51 &ZB, 0, FLA_TOP );
52 FLA_Part_1x2( T, &TL, &TR, 0, FLA_LEFT );
53
54 while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) )
55 {
56 b = min( FLA_Obj_length( ABR ), b_alg );
57
58 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
59 /* ************* */ /* ******************** */
60 &A10, /**/ &A11, &A12,
61 ABL, /**/ ABR, &A20, /**/ &A21, &A22,
62 b, b, FLA_BR );
64 /* ** */ /* ** */
65 &U1,
66 UB, &U2, b, FLA_BOTTOM );
68 /* ** */ /* ** */
69 &Z1,
70 ZB, &Z2, b, FLA_BOTTOM );
71 FLA_Repart_1x2_to_1x3( TL, /**/ TR, &T0, /**/ &T1, &T2,
72 b, FLA_RIGHT );
73
74 /*------------------------------------------------------------*/
75
77 &none2, &none3, b, b, FLA_TL );
78
79 bb = min( FLA_Obj_length( ABR ) - 1, b_alg );
80
84
86 &U2_l, b, FLA_TOP );
88 &Z2_l, b, FLA_TOP );
89
90 // [ ABR, ZB, T1 ] = FLA_Tridiag_UT_l_step_unb_var3( ABR, ZB, T1, b );
91 //FLA_Tridiag_UT_l_step_unb_var3( ABR, ZB, T1_tl );
92 //FLA_Tridiag_UT_l_step_ofu_var3( ABR, ZB, T1_tl );
94
95 if ( FLA_Obj_length( A22 ) > 0 )
96 {
97 // Build UB from ABR, with explicit unit subdiagonal and zeros.
100 &UB_bl, 1, FLA_TOP );
103
104 // A22 = A22 - U2 * Y2' - Z2 * U2';
107 }
108
109 /*------------------------------------------------------------*/
110
111 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
112 A10, A11, /**/ A12,
113 /* ************** */ /* ****************** */
114 &ABL, /**/ &ABR, A20, A21, /**/ A22,
115 FLA_TL );
117 U1,
118 /* ** */ /* ** */
119 &UB, U2, FLA_TOP );
121 Z1,
122 /* ** */ /* ** */
123 &ZB, Z2, FLA_TOP );
124 FLA_Cont_with_1x3_to_1x2( &TL, /**/ &TR, T0, T1, /**/ T2,
125 FLA_LEFT );
126 }
127
128 FLA_Obj_free( &U );
129 FLA_Obj_free( &Z );
130
131 return FLA_SUCCESS;
132}
FLA_Error FLA_Tridiag_UT_l_step_opt_var3(FLA_Obj A, FLA_Obj Z, FLA_Obj T)
Definition FLA_Tridiag_UT_l_opt_var3.c:27

References FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copy_external(), FLA_Her2k_external(), FLA_MINUS_ONE, FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Set(), FLA_Triangularize(), FLA_Tridiag_UT_l_step_opt_var3(), FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l().

◆ FLA_Tridiag_UT_l_ofu_var1()

FLA_Error FLA_Tridiag_UT_l_ofu_var1 ( FLA_Obj  A,
FLA_Obj  T 
)

◆ FLA_Tridiag_UT_l_ofu_var2()

FLA_Error FLA_Tridiag_UT_l_ofu_var2 ( FLA_Obj  A,
FLA_Obj  T 
)
14{
16}
FLA_Error FLA_Tridiag_UT_l_step_ofu_var2(FLA_Obj A, FLA_Obj T)
Definition FLA_Tridiag_UT_l_fus_var2.c:18

References FLA_Tridiag_UT_l_step_ofu_var2(), and i.

◆ FLA_Tridiag_UT_l_ofu_var3()

FLA_Error FLA_Tridiag_UT_l_ofu_var3 ( FLA_Obj  A,
FLA_Obj  T 
)
14{
16 FLA_Obj Z;
17
19
21
22 FLA_Obj_free( &Z );
23
24 return r_val;
25}
FLA_Error FLA_Tridiag_UT_l_step_ofu_var3(FLA_Obj A, FLA_Obj Z, FLA_Obj T)
Definition FLA_Tridiag_UT_l_fus_var3.c:27
FLA_Error FLA_Obj_create_conf_to(FLA_Trans trans, FLA_Obj old, FLA_Obj *obj)
Definition FLA_Obj.c:286
int FLA_Error
Definition FLA_type_defs.h:47

References FLA_Obj_create_conf_to(), FLA_Obj_free(), FLA_Tridiag_UT_l_step_ofu_var3(), and i.

◆ FLA_Tridiag_UT_l_opt_var1()

FLA_Error FLA_Tridiag_UT_l_opt_var1 ( FLA_Obj  A,
FLA_Obj  T 
)
14{
16}
FLA_Error FLA_Tridiag_UT_l_step_opt_var1(FLA_Obj A, FLA_Obj T)
Definition FLA_Tridiag_UT_l_opt_var1.c:18

References FLA_Tridiag_UT_l_step_opt_var1(), and i.

Referenced by FLA_Tridiag_UT_l().

◆ FLA_Tridiag_UT_l_opt_var2()

FLA_Error FLA_Tridiag_UT_l_opt_var2 ( FLA_Obj  A,
FLA_Obj  T 
)
14{
16}
FLA_Error FLA_Tridiag_UT_l_step_opt_var2(FLA_Obj A, FLA_Obj T)
Definition FLA_Tridiag_UT_l_opt_var2.c:18

References FLA_Tridiag_UT_l_step_opt_var2(), and i.

Referenced by FLA_Tridiag_UT_l().

◆ FLA_Tridiag_UT_l_opt_var3()

FLA_Error FLA_Tridiag_UT_l_opt_var3 ( FLA_Obj  A,
FLA_Obj  T 
)
14{
16 FLA_Obj Z;
17
19
21
22 FLA_Obj_free( &Z );
23
24 return r_val;
25}
FLA_Error FLA_Tridiag_UT_l_step_opt_var3(FLA_Obj A, FLA_Obj Z, FLA_Obj T)
Definition FLA_Tridiag_UT_l_opt_var3.c:27

References FLA_Obj_create_conf_to(), FLA_Obj_free(), FLA_Tridiag_UT_l_step_opt_var3(), and i.

Referenced by FLA_Tridiag_UT_l().

◆ FLA_Tridiag_UT_l_step_ofc_var1()

FLA_Error FLA_Tridiag_UT_l_step_ofc_var1 ( int  m_A,
int  m_T,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_T,
int  rs_T,
int  cs_T 
)

◆ FLA_Tridiag_UT_l_step_ofc_var2()

FLA_Error FLA_Tridiag_UT_l_step_ofc_var2 ( int  m_A,
int  m_T,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_T,
int  rs_T,
int  cs_T 
)
557{
562
569 int i;
570
571 // b_alg = FLA_Obj_length( T );
572 int b_alg = m_T;
573
574 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
575 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
576 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
577 scomplex* buff_u = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
578 scomplex* buff_z = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
579 scomplex* buff_w = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
580 int inc_u = 1;
581 int inc_z = 1;
582 int inc_w = 1;
583
584 // Initialize some variables (only to prevent compiler warnings).
587
588 for ( i = 0; i < b_alg; ++i )
589 {
590 scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
591 scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
592 scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
593 scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
594
595 scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
596 scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
597
599 scomplex* u21 = buff_u + (i+1)*inc_u;
600
601 scomplex* zeta11 = buff_z + (i )*inc_z;
602 scomplex* z21 = buff_z + (i+1)*inc_z;
603
604 scomplex* w21 = buff_w + (i+1)*inc_w;
605
606 scomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
607 scomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
608
609 int m_ahead = m_A - i - 1;
610 int m_behind = i;
611 int n_behind = i;
612
613 /*------------------------------------------------------------*/
614
615 if ( m_behind > 0 )
616 {
617 // FLA_Copy( upsilon11, minus_upsilon11 );
618 // FLA_Scal( FLA_MINUS_ONE, minus_upsilon11 );
619 // FLA_Copy( minus_upsilon11, minus_conj_upsilon11 );
622
623 // FLA_Copy( zeta11, minus_zeta11 );
624 // FLA_Scal( FLA_MINUS_ONE, minus_zeta11 );
625 // FLA_Copy( minus_zeta11, minus_conj_zeta11 );
628
629 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon11, zeta11, alpha11 );
630 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_zeta11, upsilon11, alpha11 );
632 1,
634 zeta11, 1,
635 alpha11, 1 );
637 1,
639 upsilon11, 1,
640 alpha11, 1 );
641
642 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_zeta11, u21, a21 );
643 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_upsilon11, z21, a21 );
645 m_ahead,
647 u21, inc_u,
648 a21, rs_A );
650 m_ahead,
652 z21, inc_z,
653 a21, rs_A );
654 }
655
656 if ( m_ahead > 0 )
657 {
658 // FLA_Househ2_UT( FLA_LEFT,
659 // a21_t,
660 // a21_b, tau11 );
662 a21_t,
663 a21_b, rs_A,
664 tau11 );
665
666 // FLA_Set( FLA_ONE, inv_tau11 );
667 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
668 // FLA_Copy( inv_tau11, minus_inv_tau11 );
669 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
672
673 // FLA_Copy( a21_t, first_elem );
674 // FLA_Set( FLA_ONE, a21_t );
675 first_elem = *a21_t;
676 *a21_t = *buff_1;
677 }
678
679 if ( m_behind > 0 && m_ahead > 0 )
680 {
681 // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, u21, z21, A22 );
682 // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, w21 );
684 buff_m1,
685 u21, inc_u,
686 z21, inc_z,
687 A22, rs_A, cs_A,
688 a21, rs_A,
689 w21, inc_w );
690 }
691 else if ( m_ahead > 0 )
692 {
693 // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, w21 );
696 m_ahead,
697 buff_1,
698 A22, rs_A, cs_A,
699 a21, rs_A,
700 buff_0,
701 w21, inc_w );
702 }
703
704 if ( m_ahead > 0 )
705 {
706 // FLA_Copy( a21, u21 );
707 // FLA_Copy( w21, z21 );
709 m_ahead,
710 a21, rs_A,
711 u21, inc_u );
713 m_ahead,
714 w21, inc_w,
715 z21, inc_z );
716
717 // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
718 // FLA_Inv_scal( FLA_TWO, beta );
720 m_ahead,
721 a21, rs_A,
722 z21, inc_z,
723 &beta );
725
726 // FLA_Scal( minus_inv_tau11, beta );
727 // FLA_Axpy( beta, a21, z21 );
728 // FLA_Scal( inv_tau11, z21 );
731 m_ahead,
732 &beta,
733 a21, rs_A,
734 z21, inc_z );
736 m_ahead,
737 &inv_tau11,
738 z21, inc_z );
739
740 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
743 m_ahead,
744 n_behind,
745 buff_1,
746 A20, rs_A, cs_A,
747 a21, rs_A,
748 buff_0,
749 t01, rs_T );
750
751 // FLA_Copy( first_elem, a21_t );
752 *a21_t = first_elem;
753 }
754
755 if ( m_behind + 1 == b_alg && m_ahead > 0 )
756 {
757 // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, u21, z21, A22 );
760 m_ahead,
761 buff_m1,
762 u21, inc_u,
763 z21, inc_z,
764 A22, rs_A, cs_A );
765 }
766
767 /*------------------------------------------------------------*/
768
769 }
770
771 // FLA_Obj_free( &u );
772 // FLA_Obj_free( &z );
773 // FLA_Obj_free( &w );
774 FLA_free( buff_u );
775 FLA_free( buff_z );
776 FLA_free( buff_w );
777
778 return FLA_SUCCESS;
779}
FLA_Obj FLA_TWO
Definition FLA_Init.c:17
void FLA_free(void *ptr)
Definition FLA_Memory.c:247
void * FLA_malloc(size_t size)
Definition FLA_Memory.c:111
FLA_Error FLA_Househ2_UT_l_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *tau)
Definition FLA_Househ2_UT.c:390
void bl1_ccopyv(conj1_t conj, int m, scomplex *x, int incx, scomplex *y, int incy)
Definition bl1_copyv.c:49
void bl1_cgemv(trans1_t transa, conj1_t conjx, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition bl1_gemv.c:125
void bl1_chemv(uplo1_t uplo, conj1_t conj, int m, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition bl1_hemv.c:35
void bl1_cher2(uplo1_t uplo, conj1_t conj, int m, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy, scomplex *a, int a_rs, int a_cs)
Definition bl1_her2.c:33
void bl1_cscalv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx)
Definition bl1_scalv.c:46
@ BLIS1_LOWER_TRIANGULAR
Definition blis_type_defs.h:62
@ BLIS1_NO_TRANSPOSE
Definition blis_type_defs.h:54
@ BLIS1_CONJ_TRANSPOSE
Definition blis_type_defs.h:57

References bl1_caxpyv(), bl1_ccopyv(), bl1_cdot(), bl1_cgemv(), bl1_chemv(), bl1_cher2(), bl1_cscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Fused_Her2_Ax_l_opc_var1(), FLA_Househ2_UT_l_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_step_ofu_var2().

◆ FLA_Tridiag_UT_l_step_ofc_var3()

FLA_Error FLA_Tridiag_UT_l_step_ofc_var3 ( int  m_A,
int  m_T,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_Z,
int  rs_Z,
int  cs_Z,
scomplex buff_T,
int  rs_T,
int  cs_T 
)
469{
474
479 int i;
480
481 // b_alg = FLA_Obj_length( T );
482 int b_alg = m_T;
483
484 // FLA_Set( FLA_ZERO, Z );
485 bl1_csetm( m_A,
486 b_alg,
487 buff_0,
488 buff_Z, rs_Z, cs_Z );
489
490 for ( i = 0; i < b_alg; ++i )
491 {
492 scomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
493 scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
494 scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
495 scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
496 scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
497
498 scomplex* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
499 scomplex* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
500 scomplex* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
501
502 scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
503 scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
504
505 scomplex* a10t_r = a10t + (i-1)*cs_A + (0 )*rs_A;
506
507 scomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
508 scomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
509
510 scomplex* ABL = a10t;
511 scomplex* ZBL = z10t;
512
514
515 int m_ahead = m_A - i - 1;
516 int m_behind = i;
517 int n_behind = i;
518
519 /*------------------------------------------------------------*/
520
521 if ( m_behind > 0 )
522 {
523 // FLA_Copy( a10t_r, last_elem );
524 // FLA_Set( FLA_ONE, a10t_r );
525 last_elem = *a10t_r;
526 *a10t_r = *buff_1;
527 }
528
529 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, z10t, FLA_ONE, a2 );
530 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a10t, FLA_ONE, a2 );
533 m_ahead + 1,
534 n_behind,
535 buff_m1,
536 ABL, rs_A, cs_A,
537 z10t, cs_Z,
538 buff_1,
539 a2, rs_A );
542 m_ahead + 1,
543 n_behind,
544 buff_m1,
545 ZBL, rs_Z, cs_Z,
546 a10t, cs_A,
547 buff_1,
548 a2, rs_A );
549
550 if ( m_behind > 0 )
551 {
552 // FLA_Copy( last_elem, a10t_r );
553 *a10t_r = last_elem;
554 }
555
556 if ( m_ahead > 0 )
557 {
558 // FLA_Househ2_UT( FLA_LEFT,
559 // a21_t,
560 // a21_b, tau11 );
562 a21_t,
563 a21_b, rs_A,
564 tau11 );
565
566 // FLA_Set( FLA_ONE, inv_tau11 );
567 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
568 // FLA_Copy( inv_tau11, minus_inv_tau11 );
569 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
572
573 // FLA_Copy( a21_t, first_elem );
574 // FLA_Set( FLA_ONE, a21_t );
575 first_elem = *a21_t;
576 *a21_t = *buff_1;
577
578 // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, z21 );
581 m_ahead,
582 buff_1,
583 A22, rs_A, cs_A,
584 a21, rs_A,
585 buff_0,
586 z21, rs_Z );
587
588 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, d01 );
589 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, a21, FLA_ZERO, f01 );
590 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f01, FLA_ONE, z21 );
591 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, d01, FLA_ONE, z21 );
592 // FLA_Copy( d01, t01 );
594 n_behind,
595 buff_m1,
596 A20, rs_A, cs_A,
597 Z20, rs_Z, cs_Z,
598 t01, rs_T,
599 a21, rs_A,
600 z21, rs_Z );
601
602 // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
603 // FLA_Inv_scal( FLA_TWO, beta );
605 m_ahead,
606 a21, rs_A,
607 z21, rs_Z,
608 &beta );
610
611 // FLA_Scal( minus_inv_tau11, beta );
612 // FLA_Axpy( beta, a21, z21 );
613 // FLA_Scal( inv_tau11, z21 );
616 m_ahead,
617 &beta,
618 a21, rs_A,
619 z21, rs_Z );
621 m_ahead,
622 &inv_tau11,
623 z21, rs_Z );
624
625 // FLA_Copy( first_elem, a21_t );
626 *a21_t = first_elem;
627 }
628
629 /*------------------------------------------------------------*/
630
631 }
632
633 return FLA_SUCCESS;
634}
void bl1_csetm(int m, int n, scomplex *sigma, scomplex *a, int a_rs, int a_cs)
Definition bl1_setm.c:61

References bl1_caxpyv(), bl1_cdot(), bl1_cgemv(), bl1_chemv(), bl1_cscalv(), bl1_csetm(), BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_Fused_UZhu_ZUhu_opc_var1(), FLA_Househ2_UT_l_opc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_step_ofu_var3().

◆ FLA_Tridiag_UT_l_step_ofd_var1()

FLA_Error FLA_Tridiag_UT_l_step_ofd_var1 ( int  m_A,
int  m_T,
double buff_A,
int  rs_A,
int  cs_A,
double buff_T,
int  rs_T,
int  cs_T 
)

◆ FLA_Tridiag_UT_l_step_ofd_var2()

FLA_Error FLA_Tridiag_UT_l_step_ofd_var2 ( int  m_A,
int  m_T,
double buff_A,
int  rs_A,
int  cs_A,
double buff_T,
int  rs_T,
int  cs_T 
)
329{
330 double* buff_2 = FLA_DOUBLE_PTR( FLA_TWO );
331 double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
332 double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
334
335 double first_elem;
336 double beta;
337 double inv_tau11;
338 double minus_inv_tau11;
341 int i;
342
343 // b_alg = FLA_Obj_length( T );
344 int b_alg = m_T;
345
346 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
347 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
348 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
349 double* buff_u = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
350 double* buff_z = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
351 double* buff_w = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
352 int inc_u = 1;
353 int inc_z = 1;
354 int inc_w = 1;
355
356 // Initialize some variables (only to prevent compiler warnings).
359
360 for ( i = 0; i < b_alg; ++i )
361 {
362 double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
363 double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
364 double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
365 double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
366
367 double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
368 double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
369
370 double* upsilon11= buff_u + (i )*inc_u;
371 double* u21 = buff_u + (i+1)*inc_u;
372
373 double* zeta11 = buff_z + (i )*inc_z;
374 double* z21 = buff_z + (i+1)*inc_z;
375
376 double* w21 = buff_w + (i+1)*inc_w;
377
378 double* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
379 double* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
380
381 int m_ahead = m_A - i - 1;
382 int m_behind = i;
383 int n_behind = i;
384
385 /*------------------------------------------------------------*/
386
387 if ( m_behind > 0 )
388 {
389 // FLA_Copy( upsilon11, minus_upsilon11 );
390 // FLA_Scal( FLA_MINUS_ONE, minus_upsilon11 );
391 // FLA_Copy( minus_upsilon11, minus_conj_upsilon11 );
394
395 // FLA_Copy( zeta11, minus_zeta11 );
396 // FLA_Scal( FLA_MINUS_ONE, minus_zeta11 );
397 // FLA_Copy( minus_zeta11, minus_conj_zeta11 );
400
401 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon11, zeta11, alpha11 );
402 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_zeta11, upsilon11, alpha11 );
404 1,
406 zeta11, 1,
407 alpha11, 1 );
409 1,
411 upsilon11, 1,
412 alpha11, 1 );
413
414 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_zeta11, u21, a21 );
415 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_upsilon11, z21, a21 );
417 m_ahead,
419 u21, inc_u,
420 a21, rs_A );
422 m_ahead,
424 z21, inc_z,
425 a21, rs_A );
426 }
427
428 if ( m_ahead > 0 )
429 {
430 // FLA_Househ2_UT( FLA_LEFT,
431 // a21_t,
432 // a21_b, tau11 );
434 a21_t,
435 a21_b, rs_A,
436 tau11 );
437
438 // FLA_Set( FLA_ONE, inv_tau11 );
439 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
440 // FLA_Copy( inv_tau11, minus_inv_tau11 );
441 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
444
445 // FLA_Copy( a21_t, first_elem );
446 // FLA_Set( FLA_ONE, a21_t );
447 first_elem = *a21_t;
448 *a21_t = *buff_1;
449 }
450
451 if ( m_behind > 0 && m_ahead > 0 )
452 {
453 // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, u21, z21, A22 );
454 // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, w21 );
456 buff_m1,
457 u21, inc_u,
458 z21, inc_z,
459 A22, rs_A, cs_A,
460 a21, rs_A,
461 w21, inc_w );
462 }
463 else if ( m_ahead > 0 )
464 {
465 // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, w21 );
467 m_ahead,
468 buff_1,
469 A22, rs_A, cs_A,
470 a21, rs_A,
471 buff_0,
472 w21, inc_w );
473 }
474
475 if ( m_ahead > 0 )
476 {
477 // FLA_Copy( a21, u21 );
478 // FLA_Copy( w21, z21 );
480 m_ahead,
481 a21, rs_A,
482 u21, inc_u );
484 m_ahead,
485 w21, inc_w,
486 z21, inc_z );
487
488 // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
489 // FLA_Inv_scal( FLA_TWO, beta );
491 m_ahead,
492 a21, rs_A,
493 z21, inc_z,
494 &beta );
496
497 // FLA_Scal( minus_inv_tau11, beta );
498 // FLA_Axpy( beta, a21, z21 );
499 // FLA_Scal( inv_tau11, z21 );
502 m_ahead,
503 &beta,
504 a21, rs_A,
505 z21, inc_z );
507 m_ahead,
508 &inv_tau11,
509 z21, inc_z );
510
511 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
514 m_ahead,
515 n_behind,
516 buff_1,
517 A20, rs_A, cs_A,
518 a21, rs_A,
519 buff_0,
520 t01, rs_T );
521
522 // FLA_Copy( first_elem, a21_t );
523 *a21_t = first_elem;
524 }
525
526 if ( m_behind + 1 == b_alg && m_ahead > 0 )
527 {
528 // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, u21, z21, A22 );
530 m_ahead,
531 buff_m1,
532 u21, inc_u,
533 z21, inc_z,
534 A22, rs_A, cs_A );
535 }
536
537 /*------------------------------------------------------------*/
538
539 }
540
541 // FLA_Obj_free( &u );
542 // FLA_Obj_free( &z );
543 // FLA_Obj_free( &w );
544 FLA_free( buff_u );
545 FLA_free( buff_z );
546 FLA_free( buff_w );
547
548 return FLA_SUCCESS;
549}
FLA_Error FLA_Househ2_UT_l_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *tau)
Definition FLA_Househ2_UT.c:274
void bl1_dcopyv(conj1_t conj, int m, double *x, int incx, double *y, int incy)
Definition bl1_copyv.c:42
void bl1_dgemv(trans1_t transa, conj1_t conjx, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
Definition bl1_gemv.c:69
void bl1_dscalv(conj1_t conj, int n, double *alpha, double *x, int incx)
Definition bl1_scalv.c:24
void bl1_dsymv(uplo1_t uplo, int m, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
Definition bl1_symv.c:56
void bl1_dsyr2(uplo1_t uplo, int m, double *alpha, double *x, int incx, double *y, int incy, double *a, int a_rs, int a_cs)
Definition bl1_syr2.c:58

References bl1_daxpyv(), bl1_dcopyv(), bl1_ddot(), bl1_dgemv(), bl1_dscalv(), bl1_dsymv(), bl1_dsyr2(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, FLA_free(), FLA_Fused_Her2_Ax_l_opd_var1(), FLA_Househ2_UT_l_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_step_ofu_var2().

◆ FLA_Tridiag_UT_l_step_ofd_var3()

FLA_Error FLA_Tridiag_UT_l_step_ofd_var3 ( int  m_A,
int  m_T,
double buff_A,
int  rs_A,
int  cs_A,
double buff_Z,
int  rs_Z,
int  cs_Z,
double buff_T,
int  rs_T,
int  cs_T 
)
296{
297 double* buff_2 = FLA_DOUBLE_PTR( FLA_TWO );
298 double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
299 double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
301
302 double first_elem, last_elem;
303 double beta;
304 double inv_tau11;
305 double minus_inv_tau11;
306 int i;
307
308 // b_alg = FLA_Obj_length( T );
309 int b_alg = m_T;
310
311 // FLA_Set( FLA_ZERO, Z );
312 bl1_dsetm( m_A,
313 b_alg,
314 buff_0,
315 buff_Z, rs_Z, cs_Z );
316
317 for ( i = 0; i < b_alg; ++i )
318 {
319 double* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
320 double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
321 double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
322 double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
323 double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
324
325 double* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
326 double* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
327 double* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
328
329 double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
330 double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
331
332 double* a10t_r = a10t + (i-1)*cs_A + (0 )*rs_A;
333
334 double* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
335 double* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
336
337 double* ABL = a10t;
338 double* ZBL = z10t;
339
340 double* a2 = alpha11;
341
342 int m_ahead = m_A - i - 1;
343 int m_behind = i;
344 int n_behind = i;
345
346 /*------------------------------------------------------------*/
347
348 if ( m_behind > 0 )
349 {
350 // FLA_Copy( a10t_r, last_elem );
351 // FLA_Set( FLA_ONE, a10t_r );
352 last_elem = *a10t_r;
353 *a10t_r = *buff_1;
354 }
355
356 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, z10t, FLA_ONE, a2 );
357 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a10t, FLA_ONE, a2 );
360 m_ahead + 1,
361 n_behind,
362 buff_m1,
363 ABL, rs_A, cs_A,
364 z10t, cs_Z,
365 buff_1,
366 a2, rs_A );
369 m_ahead + 1,
370 n_behind,
371 buff_m1,
372 ZBL, rs_Z, cs_Z,
373 a10t, cs_A,
374 buff_1,
375 a2, rs_A );
376
377 if ( m_behind > 0 )
378 {
379 // FLA_Copy( last_elem, a10t_r );
380 *a10t_r = last_elem;
381 }
382
383 if ( m_ahead > 0 )
384 {
385 // FLA_Househ2_UT( FLA_LEFT,
386 // a21_t,
387 // a21_b, tau11 );
389 a21_t,
390 a21_b, rs_A,
391 tau11 );
392
393 // FLA_Set( FLA_ONE, inv_tau11 );
394 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
395 // FLA_Copy( inv_tau11, minus_inv_tau11 );
396 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
399
400 // FLA_Copy( a21_t, first_elem );
401 // FLA_Set( FLA_ONE, a21_t );
402 first_elem = *a21_t;
403 *a21_t = *buff_1;
404
405 // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, z21 );
407 m_ahead,
408 buff_1,
409 A22, rs_A, cs_A,
410 a21, rs_A,
411 buff_0,
412 z21, rs_Z );
413
414 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, d01 );
415 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, a21, FLA_ZERO, f01 );
416 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f01, FLA_ONE, z21 );
417 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, d01, FLA_ONE, z21 );
418 // FLA_Copy( d01, t01 );
420 n_behind,
421 buff_m1,
422 A20, rs_A, cs_A,
423 Z20, rs_Z, cs_Z,
424 t01, rs_T,
425 a21, rs_A,
426 z21, rs_Z );
427
428 // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
429 // FLA_Inv_scal( FLA_TWO, beta );
431 m_ahead,
432 a21, rs_A,
433 z21, rs_Z,
434 &beta );
436
437 // FLA_Scal( minus_inv_tau11, beta );
438 // FLA_Axpy( beta, a21, z21 );
439 // FLA_Scal( inv_tau11, z21 );
442 m_ahead,
443 &beta,
444 a21, rs_A,
445 z21, rs_Z );
447 m_ahead,
448 &inv_tau11,
449 z21, rs_Z );
450
451 // FLA_Copy( first_elem, a21_t );
452 *a21_t = first_elem;
453 }
454
455 /*------------------------------------------------------------*/
456
457 }
458
459 return FLA_SUCCESS;
460}
void bl1_dsetm(int m, int n, double *sigma, double *a, int a_rs, int a_cs)
Definition bl1_setm.c:45

References bl1_daxpyv(), bl1_ddot(), bl1_dgemv(), bl1_dscalv(), bl1_dsetm(), bl1_dsymv(), BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_Fused_UZhu_ZUhu_opd_var1(), FLA_Househ2_UT_l_opd(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_step_ofu_var3().

◆ FLA_Tridiag_UT_l_step_ofs_var1()

FLA_Error FLA_Tridiag_UT_l_step_ofs_var1 ( int  m_A,
int  m_T,
float buff_A,
int  rs_A,
int  cs_A,
float buff_T,
int  rs_T,
int  cs_T 
)

◆ FLA_Tridiag_UT_l_step_ofs_var2()

FLA_Error FLA_Tridiag_UT_l_step_ofs_var2 ( int  m_A,
int  m_T,
float buff_A,
int  rs_A,
int  cs_A,
float buff_T,
int  rs_T,
int  cs_T 
)
101{
102 float* buff_2 = FLA_FLOAT_PTR( FLA_TWO );
103 float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
104 float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
106
107 float first_elem;
108 float beta;
109 float inv_tau11;
110 float minus_inv_tau11;
113 int i;
114
115 // b_alg = FLA_Obj_length( T );
116 int b_alg = m_T;
117
118 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
119 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
120 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
121 float* buff_u = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
122 float* buff_z = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
123 float* buff_w = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
124 int inc_u = 1;
125 int inc_z = 1;
126 int inc_w = 1;
127
128 // Initialize some variables (only to prevent compiler warnings).
131
132 for ( i = 0; i < b_alg; ++i )
133 {
134 float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
135 float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
136 float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
137 float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
138
139 float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
140 float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
141
142 float* upsilon11= buff_u + (i )*inc_u;
143 float* u21 = buff_u + (i+1)*inc_u;
144
145 float* zeta11 = buff_z + (i )*inc_z;
146 float* z21 = buff_z + (i+1)*inc_z;
147
148 float* w21 = buff_w + (i+1)*inc_w;
149
150 float* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
151 float* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
152
153 int m_ahead = m_A - i - 1;
154 int m_behind = i;
155 int n_behind = i;
156
157 /*------------------------------------------------------------*/
158
159 if ( m_behind > 0 )
160 {
161 // FLA_Copy( upsilon11, minus_upsilon11 );
162 // FLA_Scal( FLA_MINUS_ONE, minus_upsilon11 );
163 // FLA_Copy( minus_upsilon11, minus_conj_upsilon11 );
166
167 // FLA_Copy( zeta11, minus_zeta11 );
168 // FLA_Scal( FLA_MINUS_ONE, minus_zeta11 );
169 // FLA_Copy( minus_zeta11, minus_conj_zeta11 );
172
173 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon11, zeta11, alpha11 );
174 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_zeta11, upsilon11, alpha11 );
176 1,
178 zeta11, 1,
179 alpha11, 1 );
181 1,
183 upsilon11, 1,
184 alpha11, 1 );
185
186 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_zeta11, u21, a21 );
187 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_upsilon11, z21, a21 );
189 m_ahead,
191 u21, inc_u,
192 a21, rs_A );
194 m_ahead,
196 z21, inc_z,
197 a21, rs_A );
198 }
199
200 if ( m_ahead > 0 )
201 {
202 // FLA_Househ2_UT( FLA_LEFT,
203 // a21_t,
204 // a21_b, tau11 );
206 a21_t,
207 a21_b, rs_A,
208 tau11 );
209
210 // FLA_Set( FLA_ONE, inv_tau11 );
211 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
212 // FLA_Copy( inv_tau11, minus_inv_tau11 );
213 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
216
217 // FLA_Copy( a21_t, first_elem );
218 // FLA_Set( FLA_ONE, a21_t );
219 first_elem = *a21_t;
220 *a21_t = *buff_1;
221 }
222
223 if ( m_behind > 0 && m_ahead > 0 )
224 {
225 // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, u21, z21, A22 );
226 // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, w21 );
228 buff_m1,
229 u21, inc_u,
230 z21, inc_z,
231 A22, rs_A, cs_A,
232 a21, rs_A,
233 w21, inc_w );
234 }
235 else if ( m_ahead > 0 )
236 {
237 // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, w21 );
239 m_ahead,
240 buff_1,
241 A22, rs_A, cs_A,
242 a21, rs_A,
243 buff_0,
244 w21, inc_w );
245 }
246
247 if ( m_ahead > 0 )
248 {
249 // FLA_Copy( a21, u21 );
250 // FLA_Copy( w21, z21 );
252 m_ahead,
253 a21, rs_A,
254 u21, inc_u );
256 m_ahead,
257 w21, inc_w,
258 z21, inc_z );
259
260 // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
261 // FLA_Inv_scal( FLA_TWO, beta );
263 m_ahead,
264 a21, rs_A,
265 z21, inc_z,
266 &beta );
268
269 // FLA_Scal( minus_inv_tau11, beta );
270 // FLA_Axpy( beta, a21, z21 );
271 // FLA_Scal( inv_tau11, z21 );
274 m_ahead,
275 &beta,
276 a21, rs_A,
277 z21, inc_z );
279 m_ahead,
280 &inv_tau11,
281 z21, inc_z );
282
283 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
286 m_ahead,
287 n_behind,
288 buff_1,
289 A20, rs_A, cs_A,
290 a21, rs_A,
291 buff_0,
292 t01, rs_T );
293
294 // FLA_Copy( first_elem, a21_t );
295 *a21_t = first_elem;
296 }
297
298 if ( m_behind + 1 == b_alg && m_ahead > 0 )
299 {
300 // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, u21, z21, A22 );
302 m_ahead,
303 buff_m1,
304 u21, inc_u,
305 z21, inc_z,
306 A22, rs_A, cs_A );
307 }
308
309 /*------------------------------------------------------------*/
310
311 }
312
313 // FLA_Obj_free( &u );
314 // FLA_Obj_free( &z );
315 // FLA_Obj_free( &w );
316 FLA_free( buff_u );
317 FLA_free( buff_z );
318 FLA_free( buff_w );
319
320 return FLA_SUCCESS;
321}
FLA_Error FLA_Househ2_UT_l_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *tau)
Definition FLA_Househ2_UT.c:160
void bl1_scopyv(conj1_t conj, int m, float *x, int incx, float *y, int incy)
Definition bl1_copyv.c:35
void bl1_sgemv(trans1_t transa, conj1_t conjx, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
Definition bl1_gemv.c:13
void bl1_sscalv(conj1_t conj, int n, float *alpha, float *x, int incx)
Definition bl1_scalv.c:13
void bl1_ssymv(uplo1_t uplo, int m, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
Definition bl1_symv.c:13
void bl1_ssyr2(uplo1_t uplo, int m, float *alpha, float *x, int incx, float *y, int incy, float *a, int a_rs, int a_cs)
Definition bl1_syr2.c:13

References bl1_saxpyv(), bl1_scopyv(), bl1_sdot(), bl1_sgemv(), bl1_sscalv(), bl1_ssymv(), bl1_ssyr2(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, FLA_free(), FLA_Fused_Her2_Ax_l_ops_var1(), FLA_Househ2_UT_l_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_step_ofu_var2().

◆ FLA_Tridiag_UT_l_step_ofs_var3()

FLA_Error FLA_Tridiag_UT_l_step_ofs_var3 ( int  m_A,
int  m_T,
float buff_A,
int  rs_A,
int  cs_A,
float buff_Z,
int  rs_Z,
int  cs_Z,
float buff_T,
int  rs_T,
int  cs_T 
)
123{
124 float* buff_2 = FLA_FLOAT_PTR( FLA_TWO );
125 float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
126 float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
128
129 float first_elem, last_elem;
130 float beta;
131 float inv_tau11;
132 float minus_inv_tau11;
133 int i;
134
135 // b_alg = FLA_Obj_length( T );
136 int b_alg = m_T;
137
138 // FLA_Set( FLA_ZERO, Z );
139 bl1_ssetm( m_A,
140 b_alg,
141 buff_0,
142 buff_Z, rs_Z, cs_Z );
143
144 for ( i = 0; i < b_alg; ++i )
145 {
146 float* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
147 float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
148 float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
149 float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
150 float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
151
152 float* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
153 float* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
154 float* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
155
156 float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
157 float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
158
159 float* a10t_r = a10t + (i-1)*cs_A + (0 )*rs_A;
160
161 float* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
162 float* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
163
164 float* ABL = a10t;
165 float* ZBL = z10t;
166
167 float* a2 = alpha11;
168
169 int m_ahead = m_A - i - 1;
170 int m_behind = i;
171 int n_behind = i;
172
173 /*------------------------------------------------------------*/
174
175 if ( m_behind > 0 )
176 {
177 // FLA_Copy( a10t_r, last_elem );
178 // FLA_Set( FLA_ONE, a10t_r );
179 last_elem = *a10t_r;
180 *a10t_r = *buff_1;
181 }
182
183 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, z10t, FLA_ONE, a2 );
184 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a10t, FLA_ONE, a2 );
187 m_ahead + 1,
188 n_behind,
189 buff_m1,
190 ABL, rs_A, cs_A,
191 z10t, cs_Z,
192 buff_1,
193 a2, rs_A );
196 m_ahead + 1,
197 n_behind,
198 buff_m1,
199 ZBL, rs_Z, cs_Z,
200 a10t, cs_A,
201 buff_1,
202 a2, rs_A );
203
204 if ( m_behind > 0 )
205 {
206 // FLA_Copy( last_elem, a10t_r );
207 *a10t_r = last_elem;
208 }
209
210 if ( m_ahead > 0 )
211 {
212 // FLA_Househ2_UT( FLA_LEFT,
213 // a21_t,
214 // a21_b, tau11 );
216 a21_t,
217 a21_b, rs_A,
218 tau11 );
219
220 // FLA_Set( FLA_ONE, inv_tau11 );
221 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
222 // FLA_Copy( inv_tau11, minus_inv_tau11 );
223 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
226
227 // FLA_Copy( a21_t, first_elem );
228 // FLA_Set( FLA_ONE, a21_t );
229 first_elem = *a21_t;
230 *a21_t = *buff_1;
231
232 // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, z21 );
234 m_ahead,
235 buff_1,
236 A22, rs_A, cs_A,
237 a21, rs_A,
238 buff_0,
239 z21, rs_Z );
240
241 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, d01 );
242 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, a21, FLA_ZERO, f01 );
243 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f01, FLA_ONE, z21 );
244 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, d01, FLA_ONE, z21 );
245 // FLA_Copy( d01, t01 );
247 n_behind,
248 buff_m1,
249 A20, rs_A, cs_A,
250 Z20, rs_Z, cs_Z,
251 t01, rs_T,
252 a21, rs_A,
253 z21, rs_Z );
254
255 // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
256 // FLA_Inv_scal( FLA_TWO, beta );
258 m_ahead,
259 a21, rs_A,
260 z21, rs_Z,
261 &beta );
263
264 // FLA_Scal( minus_inv_tau11, beta );
265 // FLA_Axpy( beta, a21, z21 );
266 // FLA_Scal( inv_tau11, z21 );
269 m_ahead,
270 &beta,
271 a21, rs_A,
272 z21, rs_Z );
274 m_ahead,
275 &inv_tau11,
276 z21, rs_Z );
277
278 // FLA_Copy( first_elem, a21_t );
279 *a21_t = first_elem;
280 }
281
282 /*------------------------------------------------------------*/
283
284 }
285
286 return FLA_SUCCESS;
287}
void bl1_ssetm(int m, int n, float *sigma, float *a, int a_rs, int a_cs)
Definition bl1_setm.c:29

References bl1_saxpyv(), bl1_sdot(), bl1_sgemv(), bl1_sscalv(), bl1_ssetm(), bl1_ssymv(), BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_Fused_UZhu_ZUhu_ops_var1(), FLA_Househ2_UT_l_ops(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_step_ofu_var3().

◆ FLA_Tridiag_UT_l_step_ofu_var1()

FLA_Error FLA_Tridiag_UT_l_step_ofu_var1 ( FLA_Obj  A,
FLA_Obj  T 
)

◆ FLA_Tridiag_UT_l_step_ofu_var2()

FLA_Error FLA_Tridiag_UT_l_step_ofu_var2 ( FLA_Obj  A,
FLA_Obj  T 
)
19{
20 FLA_Datatype datatype;
21 int m_A, m_T;
22 int rs_A, cs_A;
23 int rs_T, cs_T;
24
25 datatype = FLA_Obj_datatype( A );
26
27 m_A = FLA_Obj_length( A );
28 m_T = FLA_Obj_length( T );
29
32
35
36
37 switch ( datatype )
38 {
39 case FLA_FLOAT:
40 {
41 float* buff_A = FLA_FLOAT_PTR( A );
42 float* buff_T = FLA_FLOAT_PTR( T );
43
45 m_T,
47 buff_T, rs_T, cs_T );
48
49 break;
50 }
51
52 case FLA_DOUBLE:
53 {
54 double* buff_A = FLA_DOUBLE_PTR( A );
55 double* buff_T = FLA_DOUBLE_PTR( T );
56
58 m_T,
60 buff_T, rs_T, cs_T );
61
62 break;
63 }
64
65 case FLA_COMPLEX:
66 {
69
71 m_T,
73 buff_T, rs_T, cs_T );
74
75 break;
76 }
77
79 {
82
84 m_T,
86 buff_T, rs_T, cs_T );
87
88 break;
89 }
90 }
91
92 return FLA_SUCCESS;
93}
FLA_Error FLA_Tridiag_UT_l_step_ofd_var2(int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T)
Definition FLA_Tridiag_UT_l_fus_var2.c:325
FLA_Error FLA_Tridiag_UT_l_step_ofs_var2(int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T)
Definition FLA_Tridiag_UT_l_fus_var2.c:97
FLA_Error FLA_Tridiag_UT_l_step_ofz_var2(int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T)
Definition FLA_Tridiag_UT_l_fus_var2.c:783
FLA_Error FLA_Tridiag_UT_l_step_ofc_var2(int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T)
Definition FLA_Tridiag_UT_l_fus_var2.c:553

References FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Tridiag_UT_l_step_ofc_var2(), FLA_Tridiag_UT_l_step_ofd_var2(), FLA_Tridiag_UT_l_step_ofs_var2(), FLA_Tridiag_UT_l_step_ofz_var2(), and i.

Referenced by FLA_Tridiag_UT_l_blf_var2(), and FLA_Tridiag_UT_l_ofu_var2().

◆ FLA_Tridiag_UT_l_step_ofu_var3()

FLA_Error FLA_Tridiag_UT_l_step_ofu_var3 ( FLA_Obj  A,
FLA_Obj  Z,
FLA_Obj  T 
)
28{
29 FLA_Datatype datatype;
30 int m_A, m_T;
31 int rs_A, cs_A;
32 int rs_Z, cs_Z;
33 int rs_T, cs_T;
34
35 datatype = FLA_Obj_datatype( A );
36
37 m_A = FLA_Obj_length( A );
38 m_T = FLA_Obj_length( T );
39
42
45
48
49
50 switch ( datatype )
51 {
52 case FLA_FLOAT:
53 {
54 float* buff_A = FLA_FLOAT_PTR( A );
55 float* buff_Z = FLA_FLOAT_PTR( Z );
56 float* buff_T = FLA_FLOAT_PTR( T );
57
59 m_T,
62 buff_T, rs_T, cs_T );
63
64 break;
65 }
66
67 case FLA_DOUBLE:
68 {
69 double* buff_A = FLA_DOUBLE_PTR( A );
70 double* buff_Z = FLA_DOUBLE_PTR( Z );
71 double* buff_T = FLA_DOUBLE_PTR( T );
72
74 m_T,
77 buff_T, rs_T, cs_T );
78
79 break;
80 }
81
82 case FLA_COMPLEX:
83 {
87
89 m_T,
92 buff_T, rs_T, cs_T );
93
94 break;
95 }
96
98 {
102
104 m_T,
105 buff_A, rs_A, cs_A,
106 buff_Z, rs_Z, cs_Z,
107 buff_T, rs_T, cs_T );
108
109 break;
110 }
111 }
112
113 return FLA_SUCCESS;
114}
FLA_Error FLA_Tridiag_UT_l_step_ofc_var3(int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_T, int rs_T, int cs_T)
Definition FLA_Tridiag_UT_l_fus_var3.c:464
FLA_Error FLA_Tridiag_UT_l_step_ofd_var3(int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_Z, int rs_Z, int cs_Z, double *buff_T, int rs_T, int cs_T)
Definition FLA_Tridiag_UT_l_fus_var3.c:291
FLA_Error FLA_Tridiag_UT_l_step_ofz_var3(int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_T, int rs_T, int cs_T)
Definition FLA_Tridiag_UT_l_fus_var3.c:638
FLA_Error FLA_Tridiag_UT_l_step_ofs_var3(int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_Z, int rs_Z, int cs_Z, float *buff_T, int rs_T, int cs_T)
Definition FLA_Tridiag_UT_l_fus_var3.c:118

References FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Tridiag_UT_l_step_ofc_var3(), FLA_Tridiag_UT_l_step_ofd_var3(), FLA_Tridiag_UT_l_step_ofs_var3(), FLA_Tridiag_UT_l_step_ofz_var3(), and i.

Referenced by FLA_Tridiag_UT_l_blf_var3(), and FLA_Tridiag_UT_l_ofu_var3().

◆ FLA_Tridiag_UT_l_step_ofz_var1()

FLA_Error FLA_Tridiag_UT_l_step_ofz_var1 ( int  m_A,
int  m_T,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_T,
int  rs_T,
int  cs_T 
)

◆ FLA_Tridiag_UT_l_step_ofz_var2()

FLA_Error FLA_Tridiag_UT_l_step_ofz_var2 ( int  m_A,
int  m_T,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_T,
int  rs_T,
int  cs_T 
)
787{
792
799 int i;
800
801 // b_alg = FLA_Obj_length( T );
802 int b_alg = m_T;
803
804 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
805 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
806 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
807 dcomplex* buff_u = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
808 dcomplex* buff_z = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
809 dcomplex* buff_w = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
810 int inc_u = 1;
811 int inc_z = 1;
812 int inc_w = 1;
813
814 // Initialize some variables (only to prevent compiler warnings).
817
818 for ( i = 0; i < b_alg; ++i )
819 {
820 dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
821 dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
822 dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
823 dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
824
825 dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
826 dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
827
829 dcomplex* u21 = buff_u + (i+1)*inc_u;
830
831 dcomplex* zeta11 = buff_z + (i )*inc_z;
832 dcomplex* z21 = buff_z + (i+1)*inc_z;
833
834 dcomplex* w21 = buff_w + (i+1)*inc_w;
835
836 dcomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
837 dcomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
838
839 int m_ahead = m_A - i - 1;
840 int m_behind = i;
841 int n_behind = i;
842
843 /*------------------------------------------------------------*/
844
845 if ( m_behind > 0 )
846 {
847 // FLA_Copy( upsilon11, minus_upsilon11 );
848 // FLA_Scal( FLA_MINUS_ONE, minus_upsilon11 );
849 // FLA_Copy( minus_upsilon11, minus_conj_upsilon11 );
852
853 // FLA_Copy( zeta11, minus_zeta11 );
854 // FLA_Scal( FLA_MINUS_ONE, minus_zeta11 );
855 // FLA_Copy( minus_zeta11, minus_conj_zeta11 );
858
859 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon11, zeta11, alpha11 );
860 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_zeta11, upsilon11, alpha11 );
862 1,
864 zeta11, 1,
865 alpha11, 1 );
867 1,
869 upsilon11, 1,
870 alpha11, 1 );
871
872 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_zeta11, u21, a21 );
873 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_upsilon11, z21, a21 );
875 m_ahead,
877 u21, inc_u,
878 a21, rs_A );
880 m_ahead,
882 z21, inc_z,
883 a21, rs_A );
884 }
885
886 if ( m_ahead > 0 )
887 {
888 // FLA_Househ2_UT( FLA_LEFT,
889 // a21_t,
890 // a21_b, tau11 );
892 a21_t,
893 a21_b, rs_A,
894 tau11 );
895
896 // FLA_Set( FLA_ONE, inv_tau11 );
897 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
898 // FLA_Copy( inv_tau11, minus_inv_tau11 );
899 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
902
903 // FLA_Copy( a21_t, first_elem );
904 // FLA_Set( FLA_ONE, a21_t );
905 first_elem = *a21_t;
906 *a21_t = *buff_1;
907 }
908
909 if ( m_behind > 0 && m_ahead > 0 )
910 {
911 // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, u21, z21, A22 );
912 // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, w21 );
914 buff_m1,
915 u21, inc_u,
916 z21, inc_z,
917 A22, rs_A, cs_A,
918 a21, rs_A,
919 w21, inc_w );
920 }
921 else if ( m_ahead > 0 )
922 {
923 // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, w21 );
926 m_ahead,
927 buff_1,
928 A22, rs_A, cs_A,
929 a21, rs_A,
930 buff_0,
931 w21, inc_w );
932 }
933
934 if ( m_ahead > 0 )
935 {
936 // FLA_Copy( a21, u21 );
937 // FLA_Copy( w21, z21 );
939 m_ahead,
940 a21, rs_A,
941 u21, inc_u );
943 m_ahead,
944 w21, inc_w,
945 z21, inc_z );
946
947 // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
948 // FLA_Inv_scal( FLA_TWO, beta );
950 m_ahead,
951 a21, rs_A,
952 z21, inc_z,
953 &beta );
955
956 // FLA_Scal( minus_inv_tau11, beta );
957 // FLA_Axpy( beta, a21, z21 );
958 // FLA_Scal( inv_tau11, z21 );
961 m_ahead,
962 &beta,
963 a21, rs_A,
964 z21, inc_z );
966 m_ahead,
967 &inv_tau11,
968 z21, inc_z );
969
970 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
973 m_ahead,
974 n_behind,
975 buff_1,
976 A20, rs_A, cs_A,
977 a21, rs_A,
978 buff_0,
979 t01, rs_T );
980
981 // FLA_Copy( first_elem, a21_t );
982 *a21_t = first_elem;
983 }
984
985 if ( m_behind + 1 == b_alg && m_ahead > 0 )
986 {
987 // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, u21, z21, A22 );
990 m_ahead,
991 buff_m1,
992 u21, inc_u,
993 z21, inc_z,
994 A22, rs_A, cs_A );
995 }
996
997 /*------------------------------------------------------------*/
998
999 }
1000
1001 // FLA_Obj_free( &u );
1002 // FLA_Obj_free( &z );
1003 // FLA_Obj_free( &w );
1004 FLA_free( buff_u );
1005 FLA_free( buff_z );
1006 FLA_free( buff_w );
1007
1008 return FLA_SUCCESS;
1009}
FLA_Error FLA_Househ2_UT_l_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *tau)
Definition FLA_Househ2_UT.c:521
void bl1_zcopyv(conj1_t conj, int m, dcomplex *x, int incx, dcomplex *y, int incy)
Definition bl1_copyv.c:63
bl1_zscals(beta, rho_yz)
void bl1_zgemv(trans1_t transa, conj1_t conjx, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition bl1_gemv.c:255
void bl1_zhemv(uplo1_t uplo, conj1_t conj, int m, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition bl1_hemv.c:134
void bl1_zher2(uplo1_t uplo, conj1_t conj, int m, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *a, int a_rs, int a_cs)
Definition bl1_her2.c:121
void bl1_zscalv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx)
Definition bl1_scalv.c:72

References bl1_zaxpyv(), bl1_zcopyv(), bl1_zdot(), bl1_zgemv(), bl1_zhemv(), bl1_zher2(), bl1_zscals(), bl1_zscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Fused_Her2_Ax_l_opz_var1(), FLA_Househ2_UT_l_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_step_ofu_var2().

◆ FLA_Tridiag_UT_l_step_ofz_var3()

FLA_Error FLA_Tridiag_UT_l_step_ofz_var3 ( int  m_A,
int  m_T,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_Z,
int  rs_Z,
int  cs_Z,
dcomplex buff_T,
int  rs_T,
int  cs_T 
)
643{
648
653 int i;
654
655 // b_alg = FLA_Obj_length( T );
656 int b_alg = m_T;
657
658 // FLA_Set( FLA_ZERO, Z );
659 bl1_zsetm( m_A,
660 b_alg,
661 buff_0,
662 buff_Z, rs_Z, cs_Z );
663
664 for ( i = 0; i < b_alg; ++i )
665 {
666 dcomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
667 dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
668 dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
669 dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
670 dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
671
672 dcomplex* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
673 dcomplex* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
674 dcomplex* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
675
676 dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
677 dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
678
679 dcomplex* a10t_r = a10t + (i-1)*cs_A + (0 )*rs_A;
680
681 dcomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
682 dcomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
683
684 dcomplex* ABL = a10t;
685 dcomplex* ZBL = z10t;
686
688
689 int m_ahead = m_A - i - 1;
690 int m_behind = i;
691 int n_behind = i;
692
693 /*------------------------------------------------------------*/
694
695 if ( m_behind > 0 )
696 {
697 // FLA_Copy( a10t_r, last_elem );
698 // FLA_Set( FLA_ONE, a10t_r );
699 last_elem = *a10t_r;
700 *a10t_r = *buff_1;
701 }
702
703 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, z10t, FLA_ONE, a2 );
704 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a10t, FLA_ONE, a2 );
707 m_ahead + 1,
708 n_behind,
709 buff_m1,
710 ABL, rs_A, cs_A,
711 z10t, cs_Z,
712 buff_1,
713 a2, rs_A );
716 m_ahead + 1,
717 n_behind,
718 buff_m1,
719 ZBL, rs_Z, cs_Z,
720 a10t, cs_A,
721 buff_1,
722 a2, rs_A );
723
724 if ( m_behind > 0 )
725 {
726 // FLA_Copy( last_elem, a10t_r );
727 *a10t_r = last_elem;
728 }
729
730 if ( m_ahead > 0 )
731 {
732 // FLA_Househ2_UT( FLA_LEFT,
733 // a21_t,
734 // a21_b, tau11 );
736 a21_t,
737 a21_b, rs_A,
738 tau11 );
739
740 // FLA_Set( FLA_ONE, inv_tau11 );
741 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
742 // FLA_Copy( inv_tau11, minus_inv_tau11 );
743 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
746
747 // FLA_Copy( a21_t, first_elem );
748 // FLA_Set( FLA_ONE, a21_t );
749 first_elem = *a21_t;
750 *a21_t = *buff_1;
751
752 // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, z21 );
755 m_ahead,
756 buff_1,
757 A22, rs_A, cs_A,
758 a21, rs_A,
759 buff_0,
760 z21, rs_Z );
761
762 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, d01 );
763 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, a21, FLA_ZERO, f01 );
764 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f01, FLA_ONE, z21 );
765 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, d01, FLA_ONE, z21 );
766 // FLA_Copy( d01, t01 );
768 n_behind,
769 buff_m1,
770 A20, rs_A, cs_A,
771 Z20, rs_Z, cs_Z,
772 t01, rs_T,
773 a21, rs_A,
774 z21, rs_Z );
775
776 // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
777 // FLA_Inv_scal( FLA_TWO, beta );
779 m_ahead,
780 a21, rs_A,
781 z21, rs_Z,
782 &beta );
784
785 // FLA_Scal( minus_inv_tau11, beta );
786 // FLA_Axpy( beta, a21, z21 );
787 // FLA_Scal( inv_tau11, z21 );
790 m_ahead,
791 &beta,
792 a21, rs_A,
793 z21, rs_Z );
795 m_ahead,
796 &inv_tau11,
797 z21, rs_Z );
798
799 // FLA_Copy( first_elem, a21_t );
800 *a21_t = first_elem;
801 }
802
803 /*------------------------------------------------------------*/
804
805 }
806
807 return FLA_SUCCESS;
808}
void bl1_zsetm(int m, int n, dcomplex *sigma, dcomplex *a, int a_rs, int a_cs)
Definition bl1_setm.c:78

References bl1_zaxpyv(), bl1_zdot(), bl1_zgemv(), bl1_zhemv(), bl1_zscals(), bl1_zscalv(), bl1_zsetm(), BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_Fused_UZhu_ZUhu_opz_var1(), FLA_Househ2_UT_l_opz(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_step_ofu_var3().

◆ FLA_Tridiag_UT_l_step_opc_var1()

FLA_Error FLA_Tridiag_UT_l_step_opc_var1 ( int  m_A,
int  m_T,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_T,
int  rs_T,
int  cs_T 
)
363{
368
373 int i;
374
375 // b_alg = FLA_Obj_length( T );
376 int b_alg = m_T;
377
378 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
379 scomplex* buff_z = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
380 int inc_z = 1;
381
382 for ( i = 0; i < b_alg; ++i )
383 {
384 scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
385 scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
386 scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
387
388 scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
389 scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
390
391 scomplex* z21 = buff_z + (i+1)*inc_z;
392
393 scomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
394 scomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
395
396 int m_ahead = m_A - i - 1;
397 int n_behind = i;
398
399 /*------------------------------------------------------------*/
400
401 if ( m_ahead > 0 )
402 {
403 // FLA_Househ2_UT( FLA_LEFT,
404 // a21_t,
405 // a21_b, tau11 );
407 a21_t,
408 a21_b, rs_A,
409 tau11 );
410
411 // FLA_Set( FLA_ONE, inv_tau11 );
412 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
413 // FLA_Copy( inv_tau11, minus_inv_tau11 );
414 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
417
418 // FLA_Copy( a21_t, first_elem );
419 // FLA_Set( FLA_ONE, a21_t );
420 first_elem = *a21_t;
421 *a21_t = *buff_1;
422
423 // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, z21 );
426 m_ahead,
427 buff_1,
428 A22, rs_A, cs_A,
429 a21, rs_A,
430 buff_0,
431 z21, inc_z );
432
433 // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
434 // FLA_Inv_scal( FLA_TWO, beta );
436 m_ahead,
437 a21, rs_A,
438 z21, inc_z,
439 &beta );
441
442 // FLA_Scal( minus_inv_tau11, beta );
443 // FLA_Axpy( beta, a21, z21 );
444 // FLA_Scal( inv_tau11, z21 );
447 m_ahead,
448 &beta,
449 a21, rs_A,
450 z21, inc_z );
452 m_ahead,
453 &inv_tau11,
454 z21, inc_z );
455
456 // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, a21, z21, A22 );
459 m_ahead,
460 buff_m1,
461 a21, rs_A,
462 z21, inc_z,
463 A22, rs_A, cs_A );
464
465 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
468 m_ahead,
469 n_behind,
470 buff_1,
471 A20, rs_A, cs_A,
472 a21, rs_A,
473 buff_0,
474 t01, rs_T );
475
476 // FLA_Copy( first_elem, a21_t );
477 *a21_t = first_elem;
478 }
479
480 /*------------------------------------------------------------*/
481
482 }
483
484 // FLA_Obj_free( &z );
485 FLA_free( buff_z );
486
487 return FLA_SUCCESS;
488}

References bl1_caxpyv(), bl1_cdot(), bl1_cgemv(), bl1_chemv(), bl1_cher2(), bl1_cscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_step_opt_var1().

◆ FLA_Tridiag_UT_l_step_opc_var2()

FLA_Error FLA_Tridiag_UT_l_step_opc_var2 ( int  m_A,
int  m_T,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_T,
int  rs_T,
int  cs_T 
)
549{
554
561 int i;
562
563 // b_alg = FLA_Obj_length( T );
564 int b_alg = m_T;
565
566 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
567 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
568 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
569 scomplex* buff_u = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
570 scomplex* buff_z = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
571 scomplex* buff_w = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
572 int inc_u = 1;
573 int inc_z = 1;
574 int inc_w = 1;
575
576 // Initialize some variables (only to prevent compiler warnings).
579
580 for ( i = 0; i < b_alg; ++i )
581 {
582 scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
583 scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
584 scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
585 scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
586
587 scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
588 scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
589
591 scomplex* u21 = buff_u + (i+1)*inc_u;
592
593 scomplex* zeta11 = buff_z + (i )*inc_z;
594 scomplex* z21 = buff_z + (i+1)*inc_z;
595
596 scomplex* w21 = buff_w + (i+1)*inc_w;
597
598 scomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
599 scomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
600
601 int m_ahead = m_A - i - 1;
602 int m_behind = i;
603 int n_behind = i;
604
605 /*------------------------------------------------------------*/
606
607 if ( m_behind > 0 )
608 {
609 // FLA_Copy( upsilon11, minus_upsilon11 );
610 // FLA_Scal( FLA_MINUS_ONE, minus_upsilon11 );
611 // FLA_Copy( minus_upsilon11, minus_conj_upsilon11 );
614
615 // FLA_Copy( zeta11, minus_zeta11 );
616 // FLA_Scal( FLA_MINUS_ONE, minus_zeta11 );
617 // FLA_Copy( minus_zeta11, minus_conj_zeta11 );
620
621 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon11, zeta11, alpha11 );
622 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_zeta11, upsilon11, alpha11 );
624 1,
626 zeta11, 1,
627 alpha11, 1 );
629 1,
631 upsilon11, 1,
632 alpha11, 1 );
633
634 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_zeta11, u21, a21 );
635 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_upsilon11, z21, a21 );
637 m_ahead,
639 u21, inc_u,
640 a21, rs_A );
642 m_ahead,
644 z21, inc_z,
645 a21, rs_A );
646 }
647
648 if ( m_ahead > 0 )
649 {
650 // FLA_Househ2_UT( FLA_LEFT,
651 // a21_t,
652 // a21_b, tau11 );
654 a21_t,
655 a21_b, rs_A,
656 tau11 );
657
658 // FLA_Set( FLA_ONE, inv_tau11 );
659 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
660 // FLA_Copy( inv_tau11, minus_inv_tau11 );
661 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
664
665 // FLA_Copy( a21_t, first_elem );
666 // FLA_Set( FLA_ONE, a21_t );
667 first_elem = *a21_t;
668 *a21_t = *buff_1;
669 }
670
671 if ( m_behind > 0 )
672 {
673 // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, u21, z21, A22 );
676 m_ahead,
677 buff_m1,
678 u21, inc_u,
679 z21, inc_z,
680 A22, rs_A, cs_A );
681 }
682
683 if ( m_ahead > 0 )
684 {
685 // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, w21 );
688 m_ahead,
689 buff_1,
690 A22, rs_A, cs_A,
691 a21, rs_A,
692 buff_0,
693 w21, inc_w );
694
695 // FLA_Copy( a21, u21 );
696 // FLA_Copy( w21, z21 );
698 m_ahead,
699 a21, rs_A,
700 u21, inc_u );
702 m_ahead,
703 w21, inc_w,
704 z21, inc_z );
705
706 // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
707 // FLA_Inv_scal( FLA_TWO, beta );
709 m_ahead,
710 a21, rs_A,
711 z21, inc_z,
712 &beta );
714
715 // FLA_Scal( minus_inv_tau11, beta );
716 // FLA_Axpy( beta, a21, z21 );
717 // FLA_Scal( inv_tau11, z21 );
720 m_ahead,
721 &beta,
722 a21, rs_A,
723 z21, inc_z );
725 m_ahead,
726 &inv_tau11,
727 z21, inc_z );
728
729 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
732 m_ahead,
733 n_behind,
734 buff_1,
735 A20, rs_A, cs_A,
736 a21, rs_A,
737 buff_0,
738 t01, rs_T );
739
740 // FLA_Copy( first_elem, a21_t );
741 *a21_t = first_elem;
742 }
743
744 if ( m_behind + 1 == b_alg && m_ahead > 0 )
745 {
746 // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, u21, z21, A22 );
749 m_ahead,
750 buff_m1,
751 u21, inc_u,
752 z21, inc_z,
753 A22, rs_A, cs_A );
754 }
755
756 /*------------------------------------------------------------*/
757
758 }
759
760 // FLA_Obj_free( &u );
761 // FLA_Obj_free( &z );
762 // FLA_Obj_free( &w );
763 FLA_free( buff_u );
764 FLA_free( buff_z );
765 FLA_free( buff_w );
766
767 return FLA_SUCCESS;
768}

References bl1_caxpyv(), bl1_ccopyv(), bl1_cdot(), bl1_cgemv(), bl1_chemv(), bl1_cher2(), bl1_cscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_step_opt_var2().

◆ FLA_Tridiag_UT_l_step_opc_var3()

FLA_Error FLA_Tridiag_UT_l_step_opc_var3 ( int  m_A,
int  m_T,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_Z,
int  rs_Z,
int  cs_Z,
scomplex buff_T,
int  rs_T,
int  cs_T 
)
569{
574
579 int i;
580
581 // b_alg = FLA_Obj_length( T );
582 int b_alg = m_T;
583
584 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &d );
585 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
586 scomplex* buff_d = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
587 scomplex* buff_f = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
588 int inc_d = 1;
589 int inc_f = 1;
590
591 // FLA_Set( FLA_ZERO, Z );
592 bl1_csetm( m_A,
593 b_alg,
594 buff_0,
595 buff_Z, rs_Z, cs_Z );
596
597 for ( i = 0; i < b_alg; ++i )
598 {
599 scomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
600 scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
601 scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
602 scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
603 scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
604
605 scomplex* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
606 scomplex* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
607 scomplex* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
608
609 scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
610 scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
611
612 scomplex* d01 = buff_d + (0 )*inc_d;
613
614 scomplex* f01 = buff_f + (0 )*inc_f;
615
616 scomplex* a10t_r = a10t + (i-1)*cs_A + (0 )*rs_A;
617
618 scomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
619 scomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
620
621 scomplex* ABL = a10t;
622 scomplex* ZBL = z10t;
623
625
626 int m_ahead = m_A - i - 1;
627 int m_behind = i;
628 int n_behind = i;
629
630 /*------------------------------------------------------------*/
631
632 if ( m_behind > 0 )
633 {
634 // FLA_Copy( a10t_r, last_elem );
635 // FLA_Set( FLA_ONE, a10t_r );
636 last_elem = *a10t_r;
637 *a10t_r = *buff_1;
638 }
639
640 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, z10t, FLA_ONE, a2 );
641 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a10t, FLA_ONE, a2 );
644 m_ahead + 1,
645 n_behind,
646 buff_m1,
647 ABL, rs_A, cs_A,
648 z10t, cs_Z,
649 buff_1,
650 a2, rs_A );
653 m_ahead + 1,
654 n_behind,
655 buff_m1,
656 ZBL, rs_Z, cs_Z,
657 a10t, cs_A,
658 buff_1,
659 a2, rs_A );
660
661 if ( m_behind > 0 )
662 {
663 // FLA_Copy( last_elem, a10t_r );
664 *a10t_r = last_elem;
665 }
666
667 if ( m_ahead > 0 )
668 {
669 // FLA_Househ2_UT( FLA_LEFT,
670 // a21_t,
671 // a21_b, tau11 );
673 a21_t,
674 a21_b, rs_A,
675 tau11 );
676
677 // FLA_Set( FLA_ONE, inv_tau11 );
678 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
679 // FLA_Copy( inv_tau11, minus_inv_tau11 );
680 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
683
684 // FLA_Copy( a21_t, first_elem );
685 // FLA_Set( FLA_ONE, a21_t );
686 first_elem = *a21_t;
687 *a21_t = *buff_1;
688
689 // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, z21 );
692 m_ahead,
693 buff_1,
694 A22, rs_A, cs_A,
695 a21, rs_A,
696 buff_0,
697 z21, rs_Z );
698
699 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, d01 );
700 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, a21, FLA_ZERO, f01 );
703 m_ahead,
704 n_behind,
705 buff_1,
706 A20, rs_A, cs_A,
707 a21, rs_A,
708 buff_0,
709 d01, inc_d );
712 m_ahead,
713 n_behind,
714 buff_1,
715 Z20, rs_Z, cs_Z,
716 a21, rs_A,
717 buff_0,
718 f01, inc_f );
719
720 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f01, FLA_ONE, z21 );
721 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, d01, FLA_ONE, z21 );
724 m_ahead,
725 n_behind,
726 buff_m1,
727 A20, rs_A, cs_A,
728 f01, inc_f,
729 buff_1,
730 z21, rs_Z );
733 m_ahead,
734 n_behind,
735 buff_m1,
736 Z20, rs_Z, cs_Z,
737 d01, inc_d,
738 buff_1,
739 z21, rs_Z );
740
741 // FLA_Copy( d01, t01 );
743 n_behind,
744 d01, inc_d,
745 t01, rs_T );
746
747 // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
748 // FLA_Inv_scal( FLA_TWO, beta );
750 m_ahead,
751 a21, rs_A,
752 z21, rs_Z,
753 &beta );
755
756 // FLA_Scal( minus_inv_tau11, beta );
757 // FLA_Axpy( beta, a21, z21 );
758 // FLA_Scal( inv_tau11, z21 );
761 m_ahead,
762 &beta,
763 a21, rs_A,
764 z21, rs_Z );
766 m_ahead,
767 &inv_tau11,
768 z21, rs_Z );
769
770 // FLA_Copy( first_elem, a21_t );
771 *a21_t = first_elem;
772 }
773
774 /*------------------------------------------------------------*/
775
776 }
777
778 // FLA_Obj_free( &d );
779 // FLA_Obj_free( &f );
780 FLA_free( buff_d );
781 FLA_free( buff_f );
782
783 return FLA_SUCCESS;
784}

References bl1_caxpyv(), bl1_ccopyv(), bl1_cdot(), bl1_cgemv(), bl1_chemv(), bl1_cscalv(), bl1_csetm(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_step_opt_var3().

◆ FLA_Tridiag_UT_l_step_opd_var1()

FLA_Error FLA_Tridiag_UT_l_step_opd_var1 ( int  m_A,
int  m_T,
double buff_A,
int  rs_A,
int  cs_A,
double buff_T,
int  rs_T,
int  cs_T 
)
232{
233 double* buff_2 = FLA_DOUBLE_PTR( FLA_TWO );
234 double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
235 double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
237
238 double first_elem;
239 double beta;
240 double inv_tau11;
241 double minus_inv_tau11;
242 int i;
243
244 // b_alg = FLA_Obj_length( T );
245 int b_alg = m_T;
246
247 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
248 double* buff_z = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
249 int inc_z = 1;
250
251 for ( i = 0; i < b_alg; ++i )
252 {
253 double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
254 double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
255 double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
256
257 double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
258 double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
259
260 double* z21 = buff_z + (i+1)*inc_z;
261
262 double* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
263 double* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
264
265 int m_ahead = m_A - i - 1;
266 int n_behind = i;
267
268 /*------------------------------------------------------------*/
269
270 if ( m_ahead > 0 )
271 {
272 // FLA_Househ2_UT( FLA_LEFT,
273 // a21_t,
274 // a21_b, tau11 );
276 a21_t,
277 a21_b, rs_A,
278 tau11 );
279
280 // FLA_Set( FLA_ONE, inv_tau11 );
281 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
282 // FLA_Copy( inv_tau11, minus_inv_tau11 );
283 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
286
287 // FLA_Copy( a21_t, first_elem );
288 // FLA_Set( FLA_ONE, a21_t );
289 first_elem = *a21_t;
290 *a21_t = *buff_1;
291
292 // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, z21 );
294 m_ahead,
295 buff_1,
296 A22, rs_A, cs_A,
297 a21, rs_A,
298 buff_0,
299 z21, inc_z );
300
301 // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
302 // FLA_Inv_scal( FLA_TWO, beta );
304 m_ahead,
305 a21, rs_A,
306 z21, inc_z,
307 &beta );
309
310 // FLA_Scal( minus_inv_tau11, beta );
311 // FLA_Axpy( beta, a21, z21 );
312 // FLA_Scal( inv_tau11, z21 );
315 m_ahead,
316 &beta,
317 a21, rs_A,
318 z21, inc_z );
320 m_ahead,
321 &inv_tau11,
322 z21, inc_z );
323
324 // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, a21, z21, A22 );
326 m_ahead,
327 buff_m1,
328 a21, rs_A,
329 z21, inc_z,
330 A22, rs_A, cs_A );
331
332 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
335 m_ahead,
336 n_behind,
337 buff_1,
338 A20, rs_A, cs_A,
339 a21, rs_A,
340 buff_0,
341 t01, rs_T );
342
343 // FLA_Copy( first_elem, a21_t );
344 *a21_t = first_elem;
345 }
346
347 /*------------------------------------------------------------*/
348
349 }
350
351 // FLA_Obj_free( &z );
352 FLA_free( buff_z );
353
354 return FLA_SUCCESS;
355}

References bl1_daxpyv(), bl1_ddot(), bl1_dgemv(), bl1_dscalv(), bl1_dsymv(), bl1_dsyr2(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, FLA_free(), FLA_Househ2_UT_l_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_step_opt_var1().

◆ FLA_Tridiag_UT_l_step_opd_var2()

FLA_Error FLA_Tridiag_UT_l_step_opd_var2 ( int  m_A,
int  m_T,
double buff_A,
int  rs_A,
int  cs_A,
double buff_T,
int  rs_T,
int  cs_T 
)
325{
326 double* buff_2 = FLA_DOUBLE_PTR( FLA_TWO );
327 double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
328 double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
330
331 double first_elem;
332 double beta;
333 double inv_tau11;
334 double minus_inv_tau11;
337 int i;
338
339 // b_alg = FLA_Obj_length( T );
340 int b_alg = m_T;
341
342 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
343 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
344 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
345 double* buff_u = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
346 double* buff_z = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
347 double* buff_w = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
348 int inc_u = 1;
349 int inc_z = 1;
350 int inc_w = 1;
351
352 // Initialize some variables (only to prevent compiler warnings).
355
356 for ( i = 0; i < b_alg; ++i )
357 {
358 double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
359 double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
360 double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
361 double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
362
363 double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
364 double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
365
366 double* upsilon11= buff_u + (i )*inc_u;
367 double* u21 = buff_u + (i+1)*inc_u;
368
369 double* zeta11 = buff_z + (i )*inc_z;
370 double* z21 = buff_z + (i+1)*inc_z;
371
372 double* w21 = buff_w + (i+1)*inc_w;
373
374 double* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
375 double* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
376
377 int m_ahead = m_A - i - 1;
378 int m_behind = i;
379 int n_behind = i;
380
381 /*------------------------------------------------------------*/
382
383 if ( m_behind > 0 )
384 {
385 // FLA_Copy( upsilon11, minus_upsilon11 );
386 // FLA_Scal( FLA_MINUS_ONE, minus_upsilon11 );
387 // FLA_Copy( minus_upsilon11, minus_conj_upsilon11 );
390
391 // FLA_Copy( zeta11, minus_zeta11 );
392 // FLA_Scal( FLA_MINUS_ONE, minus_zeta11 );
393 // FLA_Copy( minus_zeta11, minus_conj_zeta11 );
396
397 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon11, zeta11, alpha11 );
398 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_zeta11, upsilon11, alpha11 );
400 1,
402 zeta11, 1,
403 alpha11, 1 );
405 1,
407 upsilon11, 1,
408 alpha11, 1 );
409
410 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_zeta11, u21, a21 );
411 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_upsilon11, z21, a21 );
413 m_ahead,
415 u21, inc_u,
416 a21, rs_A );
418 m_ahead,
420 z21, inc_z,
421 a21, rs_A );
422 }
423
424 if ( m_ahead > 0 )
425 {
426 // FLA_Househ2_UT( FLA_LEFT,
427 // a21_t,
428 // a21_b, tau11 );
430 a21_t,
431 a21_b, rs_A,
432 tau11 );
433
434 // FLA_Set( FLA_ONE, inv_tau11 );
435 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
436 // FLA_Copy( inv_tau11, minus_inv_tau11 );
437 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
440
441 // FLA_Copy( a21_t, first_elem );
442 // FLA_Set( FLA_ONE, a21_t );
443 first_elem = *a21_t;
444 *a21_t = *buff_1;
445 }
446
447 if ( m_behind > 0 )
448 {
449 // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, u21, z21, A22 );
451 m_ahead,
452 buff_m1,
453 u21, inc_u,
454 z21, inc_z,
455 A22, rs_A, cs_A );
456 }
457
458 if ( m_ahead > 0 )
459 {
460 // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, w21 );
462 m_ahead,
463 buff_1,
464 A22, rs_A, cs_A,
465 a21, rs_A,
466 buff_0,
467 w21, inc_w );
468
469 // FLA_Copy( a21, u21 );
470 // FLA_Copy( w21, z21 );
472 m_ahead,
473 a21, rs_A,
474 u21, inc_u );
476 m_ahead,
477 w21, inc_w,
478 z21, inc_z );
479
480 // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
481 // FLA_Inv_scal( FLA_TWO, beta );
483 m_ahead,
484 a21, rs_A,
485 z21, inc_z,
486 &beta );
488
489 // FLA_Scal( minus_inv_tau11, beta );
490 // FLA_Axpy( beta, a21, z21 );
491 // FLA_Scal( inv_tau11, z21 );
494 m_ahead,
495 &beta,
496 a21, rs_A,
497 z21, inc_z );
499 m_ahead,
500 &inv_tau11,
501 z21, inc_z );
502
503 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
506 m_ahead,
507 n_behind,
508 buff_1,
509 A20, rs_A, cs_A,
510 a21, rs_A,
511 buff_0,
512 t01, rs_T );
513
514 // FLA_Copy( first_elem, a21_t );
515 *a21_t = first_elem;
516 }
517
518 if ( m_behind + 1 == b_alg && m_ahead > 0 )
519 {
520 // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, u21, z21, A22 );
522 m_ahead,
523 buff_m1,
524 u21, inc_u,
525 z21, inc_z,
526 A22, rs_A, cs_A );
527 }
528
529 /*------------------------------------------------------------*/
530
531 }
532
533 // FLA_Obj_free( &u );
534 // FLA_Obj_free( &z );
535 // FLA_Obj_free( &w );
536 FLA_free( buff_u );
537 FLA_free( buff_z );
538 FLA_free( buff_w );
539
540 return FLA_SUCCESS;
541}

References bl1_daxpyv(), bl1_dcopyv(), bl1_ddot(), bl1_dgemv(), bl1_dscalv(), bl1_dsymv(), bl1_dsyr2(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, FLA_free(), FLA_Househ2_UT_l_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_step_opt_var2().

◆ FLA_Tridiag_UT_l_step_opd_var3()

FLA_Error FLA_Tridiag_UT_l_step_opd_var3 ( int  m_A,
int  m_T,
double buff_A,
int  rs_A,
int  cs_A,
double buff_Z,
int  rs_Z,
int  cs_Z,
double buff_T,
int  rs_T,
int  cs_T 
)
346{
347 double* buff_2 = FLA_DOUBLE_PTR( FLA_TWO );
348 double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
349 double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
351
352 double first_elem, last_elem;
353 double beta;
354 double inv_tau11;
355 double minus_inv_tau11;
356 int i;
357
358 // b_alg = FLA_Obj_length( T );
359 int b_alg = m_T;
360
361 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &d );
362 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
363 double* buff_d = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
364 double* buff_f = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
365 int inc_d = 1;
366 int inc_f = 1;
367
368 // FLA_Set( FLA_ZERO, Z );
369 bl1_dsetm( m_A,
370 b_alg,
371 buff_0,
372 buff_Z, rs_Z, cs_Z );
373
374 for ( i = 0; i < b_alg; ++i )
375 {
376 double* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
377 double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
378 double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
379 double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
380 double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
381
382 double* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
383 double* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
384 double* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
385
386 double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
387 double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
388
389 double* d01 = buff_d + (0 )*inc_d;
390
391 double* f01 = buff_f + (0 )*inc_f;
392
393 double* a10t_r = a10t + (i-1)*cs_A + (0 )*rs_A;
394
395 double* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
396 double* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
397
398 double* ABL = a10t;
399 double* ZBL = z10t;
400
401 double* a2 = alpha11;
402
403 int m_ahead = m_A - i - 1;
404 int m_behind = i;
405 int n_behind = i;
406
407 /*------------------------------------------------------------*/
408
409 if ( m_behind > 0 )
410 {
411 // FLA_Copy( a10t_r, last_elem );
412 // FLA_Set( FLA_ONE, a10t_r );
413 last_elem = *a10t_r;
414 *a10t_r = *buff_1;
415 }
416
417 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, z10t, FLA_ONE, a2 );
418 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a10t, FLA_ONE, a2 );
421 m_ahead + 1,
422 n_behind,
423 buff_m1,
424 ABL, rs_A, cs_A,
425 z10t, cs_Z,
426 buff_1,
427 a2, rs_A );
430 m_ahead + 1,
431 n_behind,
432 buff_m1,
433 ZBL, rs_Z, cs_Z,
434 a10t, cs_A,
435 buff_1,
436 a2, rs_A );
437
438 if ( m_behind > 0 )
439 {
440 // FLA_Copy( last_elem, a10t_r );
441 *a10t_r = last_elem;
442 }
443
444 if ( m_ahead > 0 )
445 {
446 // FLA_Househ2_UT( FLA_LEFT,
447 // a21_t,
448 // a21_b, tau11 );
450 a21_t,
451 a21_b, rs_A,
452 tau11 );
453
454 // FLA_Set( FLA_ONE, inv_tau11 );
455 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
456 // FLA_Copy( inv_tau11, minus_inv_tau11 );
457 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
460
461 // FLA_Copy( a21_t, first_elem );
462 // FLA_Set( FLA_ONE, a21_t );
463 first_elem = *a21_t;
464 *a21_t = *buff_1;
465
466 // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, z21 );
468 m_ahead,
469 buff_1,
470 A22, rs_A, cs_A,
471 a21, rs_A,
472 buff_0,
473 z21, rs_Z );
474
475 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, d01 );
476 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, a21, FLA_ZERO, f01 );
479 m_ahead,
480 n_behind,
481 buff_1,
482 A20, rs_A, cs_A,
483 a21, rs_A,
484 buff_0,
485 d01, inc_d );
488 m_ahead,
489 n_behind,
490 buff_1,
491 Z20, rs_Z, cs_Z,
492 a21, rs_A,
493 buff_0,
494 f01, inc_f );
495
496 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f01, FLA_ONE, z21 );
497 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, d01, FLA_ONE, z21 );
500 m_ahead,
501 n_behind,
502 buff_m1,
503 A20, rs_A, cs_A,
504 f01, inc_f,
505 buff_1,
506 z21, rs_Z );
509 m_ahead,
510 n_behind,
511 buff_m1,
512 Z20, rs_Z, cs_Z,
513 d01, inc_d,
514 buff_1,
515 z21, rs_Z );
516
517 // FLA_Copy( d01, t01 );
519 n_behind,
520 d01, inc_d,
521 t01, rs_T );
522
523 // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
524 // FLA_Inv_scal( FLA_TWO, beta );
526 m_ahead,
527 a21, rs_A,
528 z21, rs_Z,
529 &beta );
531
532 // FLA_Scal( minus_inv_tau11, beta );
533 // FLA_Axpy( beta, a21, z21 );
534 // FLA_Scal( inv_tau11, z21 );
537 m_ahead,
538 &beta,
539 a21, rs_A,
540 z21, rs_Z );
542 m_ahead,
543 &inv_tau11,
544 z21, rs_Z );
545
546 // FLA_Copy( first_elem, a21_t );
547 *a21_t = first_elem;
548 }
549
550 /*------------------------------------------------------------*/
551
552 }
553
554 // FLA_Obj_free( &d );
555 // FLA_Obj_free( &f );
556 FLA_free( buff_d );
557 FLA_free( buff_f );
558
559 return FLA_SUCCESS;
560}

References bl1_daxpyv(), bl1_dcopyv(), bl1_ddot(), bl1_dgemv(), bl1_dscalv(), bl1_dsetm(), bl1_dsymv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_step_opt_var3().

◆ FLA_Tridiag_UT_l_step_ops_var1()

FLA_Error FLA_Tridiag_UT_l_step_ops_var1 ( int  m_A,
int  m_T,
float buff_A,
int  rs_A,
int  cs_A,
float buff_T,
int  rs_T,
int  cs_T 
)
101{
102 float* buff_2 = FLA_FLOAT_PTR( FLA_TWO );
103 float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
104 float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
106
107 float first_elem;
108 float beta;
109 float inv_tau11;
110 float minus_inv_tau11;
111 int i;
112
113 // b_alg = FLA_Obj_length( T );
114 int b_alg = m_T;
115
116 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
117 float* buff_z = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
118 int inc_z = 1;
119
120 for ( i = 0; i < b_alg; ++i )
121 {
122 float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
123 float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
124 float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
125
126 float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
127 float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
128
129 float* z21 = buff_z + (i+1)*inc_z;
130
131 float* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
132 float* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
133
134 int m_ahead = m_A - i - 1;
135 int n_behind = i;
136
137 /*------------------------------------------------------------*/
138
139 if ( m_ahead > 0 )
140 {
141 // FLA_Househ2_UT( FLA_LEFT,
142 // a21_t,
143 // a21_b, tau11 );
145 a21_t,
146 a21_b, rs_A,
147 tau11 );
148
149 // FLA_Set( FLA_ONE, inv_tau11 );
150 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
151 // FLA_Copy( inv_tau11, minus_inv_tau11 );
152 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
155
156 // FLA_Copy( a21_t, first_elem );
157 // FLA_Set( FLA_ONE, a21_t );
158 first_elem = *a21_t;
159 *a21_t = *buff_1;
160
161 // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, z21 );
163 m_ahead,
164 buff_1,
165 A22, rs_A, cs_A,
166 a21, rs_A,
167 buff_0,
168 z21, inc_z );
169
170 // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
171 // FLA_Inv_scal( FLA_TWO, beta );
173 m_ahead,
174 a21, rs_A,
175 z21, inc_z,
176 &beta );
178
179 // FLA_Scal( minus_inv_tau11, beta );
180 // FLA_Axpy( beta, a21, z21 );
181 // FLA_Scal( inv_tau11, z21 );
184 m_ahead,
185 &beta,
186 a21, rs_A,
187 z21, inc_z );
189 m_ahead,
190 &inv_tau11,
191 z21, inc_z );
192
193 // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, a21, z21, A22 );
195 m_ahead,
196 buff_m1,
197 a21, rs_A,
198 z21, inc_z,
199 A22, rs_A, cs_A );
200
201 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
204 m_ahead,
205 n_behind,
206 buff_1,
207 A20, rs_A, cs_A,
208 a21, rs_A,
209 buff_0,
210 t01, rs_T );
211
212 // FLA_Copy( first_elem, a21_t );
213 *a21_t = first_elem;
214 }
215
216 /*------------------------------------------------------------*/
217
218 }
219
220 // FLA_Obj_free( &z );
221 FLA_free( buff_z );
222
223 return FLA_SUCCESS;
224}

References bl1_saxpyv(), bl1_sdot(), bl1_sgemv(), bl1_sscalv(), bl1_ssymv(), bl1_ssyr2(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, FLA_free(), FLA_Househ2_UT_l_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_step_opt_var1().

◆ FLA_Tridiag_UT_l_step_ops_var2()

FLA_Error FLA_Tridiag_UT_l_step_ops_var2 ( int  m_A,
int  m_T,
float buff_A,
int  rs_A,
int  cs_A,
float buff_T,
int  rs_T,
int  cs_T 
)
101{
102 float* buff_2 = FLA_FLOAT_PTR( FLA_TWO );
103 float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
104 float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
106
107 float first_elem;
108 float beta;
109 float inv_tau11;
110 float minus_inv_tau11;
113 int i;
114
115 // b_alg = FLA_Obj_length( T );
116 int b_alg = m_T;
117
118 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
119 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
120 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
121 float* buff_u = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
122 float* buff_z = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
123 float* buff_w = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
124 int inc_u = 1;
125 int inc_z = 1;
126 int inc_w = 1;
127
128 // Initialize some variables (only to prevent compiler warnings).
131
132 for ( i = 0; i < b_alg; ++i )
133 {
134 float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
135 float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
136 float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
137 float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
138
139 float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
140 float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
141
142 float* upsilon11= buff_u + (i )*inc_u;
143 float* u21 = buff_u + (i+1)*inc_u;
144
145 float* zeta11 = buff_z + (i )*inc_z;
146 float* z21 = buff_z + (i+1)*inc_z;
147
148 float* w21 = buff_w + (i+1)*inc_w;
149
150 float* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
151 float* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
152
153 int m_ahead = m_A - i - 1;
154 int m_behind = i;
155 int n_behind = i;
156
157 /*------------------------------------------------------------*/
158
159 if ( m_behind > 0 )
160 {
161 // FLA_Copy( upsilon11, minus_upsilon11 );
162 // FLA_Scal( FLA_MINUS_ONE, minus_upsilon11 );
163 // FLA_Copy( minus_upsilon11, minus_conj_upsilon11 );
166
167 // FLA_Copy( zeta11, minus_zeta11 );
168 // FLA_Scal( FLA_MINUS_ONE, minus_zeta11 );
169 // FLA_Copy( minus_zeta11, minus_conj_zeta11 );
172
173 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon11, zeta11, alpha11 );
174 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_zeta11, upsilon11, alpha11 );
176 1,
178 zeta11, 1,
179 alpha11, 1 );
181 1,
183 upsilon11, 1,
184 alpha11, 1 );
185
186 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_zeta11, u21, a21 );
187 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_upsilon11, z21, a21 );
189 m_ahead,
191 u21, inc_u,
192 a21, rs_A );
194 m_ahead,
196 z21, inc_z,
197 a21, rs_A );
198 }
199
200 if ( m_ahead > 0 )
201 {
202 // FLA_Househ2_UT( FLA_LEFT,
203 // a21_t,
204 // a21_b, tau11 );
206 a21_t,
207 a21_b, rs_A,
208 tau11 );
209
210 // FLA_Set( FLA_ONE, inv_tau11 );
211 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
212 // FLA_Copy( inv_tau11, minus_inv_tau11 );
213 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
216
217 // FLA_Copy( a21_t, first_elem );
218 // FLA_Set( FLA_ONE, a21_t );
219 first_elem = *a21_t;
220 *a21_t = *buff_1;
221 }
222
223 if ( m_behind > 0 )
224 {
225 // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, u21, z21, A22 );
227 m_ahead,
228 buff_m1,
229 u21, inc_u,
230 z21, inc_z,
231 A22, rs_A, cs_A );
232 }
233
234 if ( m_ahead > 0 )
235 {
236 // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, w21 );
238 m_ahead,
239 buff_1,
240 A22, rs_A, cs_A,
241 a21, rs_A,
242 buff_0,
243 w21, inc_w );
244
245 // FLA_Copy( a21, u21 );
246 // FLA_Copy( w21, z21 );
248 m_ahead,
249 a21, rs_A,
250 u21, inc_u );
252 m_ahead,
253 w21, inc_w,
254 z21, inc_z );
255
256 // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
257 // FLA_Inv_scal( FLA_TWO, beta );
259 m_ahead,
260 a21, rs_A,
261 z21, inc_z,
262 &beta );
264
265 // FLA_Scal( minus_inv_tau11, beta );
266 // FLA_Axpy( beta, a21, z21 );
267 // FLA_Scal( inv_tau11, z21 );
270 m_ahead,
271 &beta,
272 a21, rs_A,
273 z21, inc_z );
275 m_ahead,
276 &inv_tau11,
277 z21, inc_z );
278
279 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
282 m_ahead,
283 n_behind,
284 buff_1,
285 A20, rs_A, cs_A,
286 a21, rs_A,
287 buff_0,
288 t01, rs_T );
289
290 // FLA_Copy( first_elem, a21_t );
291 *a21_t = first_elem;
292 }
293
294 if ( m_behind + 1 == b_alg && m_ahead > 0 )
295 {
296 // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, u21, z21, A22 );
298 m_ahead,
299 buff_m1,
300 u21, inc_u,
301 z21, inc_z,
302 A22, rs_A, cs_A );
303 }
304
305 /*------------------------------------------------------------*/
306
307 }
308
309 // FLA_Obj_free( &u );
310 // FLA_Obj_free( &z );
311 // FLA_Obj_free( &w );
312 FLA_free( buff_u );
313 FLA_free( buff_z );
314 FLA_free( buff_w );
315
316 return FLA_SUCCESS;
317}

References bl1_saxpyv(), bl1_scopyv(), bl1_sdot(), bl1_sgemv(), bl1_sscalv(), bl1_ssymv(), bl1_ssyr2(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, FLA_free(), FLA_Househ2_UT_l_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_step_opt_var2().

◆ FLA_Tridiag_UT_l_step_ops_var3()

FLA_Error FLA_Tridiag_UT_l_step_ops_var3 ( int  m_A,
int  m_T,
float buff_A,
int  rs_A,
int  cs_A,
float buff_Z,
int  rs_Z,
int  cs_Z,
float buff_T,
int  rs_T,
int  cs_T 
)
123{
124 float* buff_2 = FLA_FLOAT_PTR( FLA_TWO );
125 float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
126 float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
128
129 float first_elem, last_elem;
130 float beta;
131 float inv_tau11;
132 float minus_inv_tau11;
133 int i;
134
135 // b_alg = FLA_Obj_length( T );
136 int b_alg = m_T;
137
138 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &d );
139 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
140 float* buff_d = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
141 float* buff_f = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
142 int inc_d = 1;
143 int inc_f = 1;
144
145 // FLA_Set( FLA_ZERO, Z );
146 bl1_ssetm( m_A,
147 b_alg,
148 buff_0,
149 buff_Z, rs_Z, cs_Z );
150
151 for ( i = 0; i < b_alg; ++i )
152 {
153 float* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
154 float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
155 float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
156 float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
157 float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
158
159 float* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
160 float* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
161 float* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
162
163 float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
164 float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
165
166 float* d01 = buff_d + (0 )*inc_d;
167
168 float* f01 = buff_f + (0 )*inc_f;
169
170 float* a10t_r = a10t + (i-1)*cs_A + (0 )*rs_A;
171
172 float* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
173 float* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
174
175 float* ABL = a10t;
176 float* ZBL = z10t;
177
178 float* a2 = alpha11;
179
180 int m_ahead = m_A - i - 1;
181 int m_behind = i;
182 int n_behind = i;
183
184 /*------------------------------------------------------------*/
185
186 if ( m_behind > 0 )
187 {
188 // FLA_Copy( a10t_r, last_elem );
189 // FLA_Set( FLA_ONE, a10t_r );
190 last_elem = *a10t_r;
191 *a10t_r = *buff_1;
192 }
193
194 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, z10t, FLA_ONE, a2 );
195 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a10t, FLA_ONE, a2 );
198 m_ahead + 1,
199 n_behind,
200 buff_m1,
201 ABL, rs_A, cs_A,
202 z10t, cs_Z,
203 buff_1,
204 a2, rs_A );
207 m_ahead + 1,
208 n_behind,
209 buff_m1,
210 ZBL, rs_Z, cs_Z,
211 a10t, cs_A,
212 buff_1,
213 a2, rs_A );
214
215 if ( m_behind > 0 )
216 {
217 // FLA_Copy( last_elem, a10t_r );
218 *a10t_r = last_elem;
219 }
220
221 if ( m_ahead > 0 )
222 {
223 // FLA_Househ2_UT( FLA_LEFT,
224 // a21_t,
225 // a21_b, tau11 );
227 a21_t,
228 a21_b, rs_A,
229 tau11 );
230
231 // FLA_Set( FLA_ONE, inv_tau11 );
232 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
233 // FLA_Copy( inv_tau11, minus_inv_tau11 );
234 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
237
238 // FLA_Copy( a21_t, first_elem );
239 // FLA_Set( FLA_ONE, a21_t );
240 first_elem = *a21_t;
241 *a21_t = *buff_1;
242
243 // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, z21 );
245 m_ahead,
246 buff_1,
247 A22, rs_A, cs_A,
248 a21, rs_A,
249 buff_0,
250 z21, rs_Z );
251
252 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, d01 );
253 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, a21, FLA_ZERO, f01 );
256 m_ahead,
257 n_behind,
258 buff_1,
259 A20, rs_A, cs_A,
260 a21, rs_A,
261 buff_0,
262 d01, inc_d );
265 m_ahead,
266 n_behind,
267 buff_1,
268 Z20, rs_Z, cs_Z,
269 a21, rs_A,
270 buff_0,
271 f01, inc_f );
272
273 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f01, FLA_ONE, z21 );
274 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, d01, FLA_ONE, z21 );
277 m_ahead,
278 n_behind,
279 buff_m1,
280 A20, rs_A, cs_A,
281 f01, inc_f,
282 buff_1,
283 z21, rs_Z );
286 m_ahead,
287 n_behind,
288 buff_m1,
289 Z20, rs_Z, cs_Z,
290 d01, inc_d,
291 buff_1,
292 z21, rs_Z );
293
294 // FLA_Copy( d01, t01 );
296 n_behind,
297 d01, inc_d,
298 t01, rs_T );
299
300 // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
301 // FLA_Inv_scal( FLA_TWO, beta );
303 m_ahead,
304 a21, rs_A,
305 z21, rs_Z,
306 &beta );
308
309 // FLA_Scal( minus_inv_tau11, beta );
310 // FLA_Axpy( beta, a21, z21 );
311 // FLA_Scal( inv_tau11, z21 );
314 m_ahead,
315 &beta,
316 a21, rs_A,
317 z21, rs_Z );
319 m_ahead,
320 &inv_tau11,
321 z21, rs_Z );
322
323 // FLA_Copy( first_elem, a21_t );
324 *a21_t = first_elem;
325 }
326
327 /*------------------------------------------------------------*/
328
329 }
330
331 // FLA_Obj_free( &d );
332 // FLA_Obj_free( &f );
333 FLA_free( buff_d );
334 FLA_free( buff_f );
335
336 return FLA_SUCCESS;
337}

References bl1_saxpyv(), bl1_scopyv(), bl1_sdot(), bl1_sgemv(), bl1_sscalv(), bl1_ssetm(), bl1_ssymv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_step_opt_var3().

◆ FLA_Tridiag_UT_l_step_opt_var1()

FLA_Error FLA_Tridiag_UT_l_step_opt_var1 ( FLA_Obj  A,
FLA_Obj  T 
)
19{
20 FLA_Datatype datatype;
21 int m_A, m_T;
22 int rs_A, cs_A;
23 int rs_T, cs_T;
24
25 datatype = FLA_Obj_datatype( A );
26
27 m_A = FLA_Obj_length( A );
28 m_T = FLA_Obj_length( T );
29
32
35
36
37 switch ( datatype )
38 {
39 case FLA_FLOAT:
40 {
41 float* buff_A = FLA_FLOAT_PTR( A );
42 float* buff_T = FLA_FLOAT_PTR( T );
43
45 m_T,
47 buff_T, rs_T, cs_T );
48
49 break;
50 }
51
52 case FLA_DOUBLE:
53 {
54 double* buff_A = FLA_DOUBLE_PTR( A );
55 double* buff_T = FLA_DOUBLE_PTR( T );
56
58 m_T,
60 buff_T, rs_T, cs_T );
61
62 break;
63 }
64
65 case FLA_COMPLEX:
66 {
69
71 m_T,
73 buff_T, rs_T, cs_T );
74
75 break;
76 }
77
79 {
82
84 m_T,
86 buff_T, rs_T, cs_T );
87
88 break;
89 }
90 }
91
92 return FLA_SUCCESS;
93}
FLA_Error FLA_Tridiag_UT_l_step_ops_var1(int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T)
Definition FLA_Tridiag_UT_l_opt_var1.c:97
FLA_Error FLA_Tridiag_UT_l_step_opd_var1(int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T)
Definition FLA_Tridiag_UT_l_opt_var1.c:228
FLA_Error FLA_Tridiag_UT_l_step_opc_var1(int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T)
Definition FLA_Tridiag_UT_l_opt_var1.c:359
FLA_Error FLA_Tridiag_UT_l_step_opz_var1(int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T)
Definition FLA_Tridiag_UT_l_opt_var1.c:492

References FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Tridiag_UT_l_step_opc_var1(), FLA_Tridiag_UT_l_step_opd_var1(), FLA_Tridiag_UT_l_step_ops_var1(), FLA_Tridiag_UT_l_step_opz_var1(), and i.

Referenced by FLA_Tridiag_UT_l_blk_var1(), and FLA_Tridiag_UT_l_opt_var1().

◆ FLA_Tridiag_UT_l_step_opt_var2()

FLA_Error FLA_Tridiag_UT_l_step_opt_var2 ( FLA_Obj  A,
FLA_Obj  T 
)
19{
20 FLA_Datatype datatype;
21 int m_A, m_T;
22 int rs_A, cs_A;
23 int rs_T, cs_T;
24
25 datatype = FLA_Obj_datatype( A );
26
27 m_A = FLA_Obj_length( A );
28 m_T = FLA_Obj_length( T );
29
32
35
36
37 switch ( datatype )
38 {
39 case FLA_FLOAT:
40 {
41 float* buff_A = FLA_FLOAT_PTR( A );
42 float* buff_T = FLA_FLOAT_PTR( T );
43
45 m_T,
47 buff_T, rs_T, cs_T );
48
49 break;
50 }
51
52 case FLA_DOUBLE:
53 {
54 double* buff_A = FLA_DOUBLE_PTR( A );
55 double* buff_T = FLA_DOUBLE_PTR( T );
56
58 m_T,
60 buff_T, rs_T, cs_T );
61
62 break;
63 }
64
65 case FLA_COMPLEX:
66 {
69
71 m_T,
73 buff_T, rs_T, cs_T );
74
75 break;
76 }
77
79 {
82
84 m_T,
86 buff_T, rs_T, cs_T );
87
88 break;
89 }
90 }
91
92 return FLA_SUCCESS;
93}
FLA_Error FLA_Tridiag_UT_l_step_opc_var2(int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T)
Definition FLA_Tridiag_UT_l_opt_var2.c:545
FLA_Error FLA_Tridiag_UT_l_step_opd_var2(int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T)
Definition FLA_Tridiag_UT_l_opt_var2.c:321
FLA_Error FLA_Tridiag_UT_l_step_ops_var2(int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T)
Definition FLA_Tridiag_UT_l_opt_var2.c:97
FLA_Error FLA_Tridiag_UT_l_step_opz_var2(int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T)
Definition FLA_Tridiag_UT_l_opt_var2.c:772

References FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Tridiag_UT_l_step_opc_var2(), FLA_Tridiag_UT_l_step_opd_var2(), FLA_Tridiag_UT_l_step_ops_var2(), FLA_Tridiag_UT_l_step_opz_var2(), and i.

Referenced by FLA_Tridiag_UT_l_blk_var2(), and FLA_Tridiag_UT_l_opt_var2().

◆ FLA_Tridiag_UT_l_step_opt_var3()

FLA_Error FLA_Tridiag_UT_l_step_opt_var3 ( FLA_Obj  A,
FLA_Obj  Z,
FLA_Obj  T 
)
28{
29 FLA_Datatype datatype;
30 int m_A, m_T;
31 int rs_A, cs_A;
32 int rs_Z, cs_Z;
33 int rs_T, cs_T;
34
35 datatype = FLA_Obj_datatype( A );
36
37 m_A = FLA_Obj_length( A );
38 m_T = FLA_Obj_length( T );
39
42
45
48
49
50 switch ( datatype )
51 {
52 case FLA_FLOAT:
53 {
54 float* buff_A = FLA_FLOAT_PTR( A );
55 float* buff_Z = FLA_FLOAT_PTR( Z );
56 float* buff_T = FLA_FLOAT_PTR( T );
57
59 m_T,
62 buff_T, rs_T, cs_T );
63
64 break;
65 }
66
67 case FLA_DOUBLE:
68 {
69 double* buff_A = FLA_DOUBLE_PTR( A );
70 double* buff_Z = FLA_DOUBLE_PTR( Z );
71 double* buff_T = FLA_DOUBLE_PTR( T );
72
74 m_T,
77 buff_T, rs_T, cs_T );
78
79 break;
80 }
81
82 case FLA_COMPLEX:
83 {
87
89 m_T,
92 buff_T, rs_T, cs_T );
93
94 break;
95 }
96
98 {
102
104 m_T,
105 buff_A, rs_A, cs_A,
106 buff_Z, rs_Z, cs_Z,
107 buff_T, rs_T, cs_T );
108
109 break;
110 }
111 }
112
113 return FLA_SUCCESS;
114}
FLA_Error FLA_Tridiag_UT_l_step_opd_var3(int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_Z, int rs_Z, int cs_Z, double *buff_T, int rs_T, int cs_T)
Definition FLA_Tridiag_UT_l_opt_var3.c:341
FLA_Error FLA_Tridiag_UT_l_step_opc_var3(int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_T, int rs_T, int cs_T)
Definition FLA_Tridiag_UT_l_opt_var3.c:564
FLA_Error FLA_Tridiag_UT_l_step_ops_var3(int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_Z, int rs_Z, int cs_Z, float *buff_T, int rs_T, int cs_T)
Definition FLA_Tridiag_UT_l_opt_var3.c:118
FLA_Error FLA_Tridiag_UT_l_step_opz_var3(int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_T, int rs_T, int cs_T)
Definition FLA_Tridiag_UT_l_opt_var3.c:788

References FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Tridiag_UT_l_step_opc_var3(), FLA_Tridiag_UT_l_step_opd_var3(), FLA_Tridiag_UT_l_step_ops_var3(), FLA_Tridiag_UT_l_step_opz_var3(), and i.

Referenced by FLA_Tridiag_UT_l_blk_var3(), and FLA_Tridiag_UT_l_opt_var3().

◆ FLA_Tridiag_UT_l_step_opz_var1()

FLA_Error FLA_Tridiag_UT_l_step_opz_var1 ( int  m_A,
int  m_T,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_T,
int  rs_T,
int  cs_T 
)
496{
501
506 int i;
507
508 // b_alg = FLA_Obj_length( T );
509 int b_alg = m_T;
510
511 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
512 dcomplex* buff_z = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
513 int inc_z = 1;
514
515 for ( i = 0; i < b_alg; ++i )
516 {
517 dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
518 dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
519 dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
520
521 dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
522 dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
523
524 dcomplex* z21 = buff_z + (i+1)*inc_z;
525
526 dcomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
527 dcomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
528
529 int m_ahead = m_A - i - 1;
530 int n_behind = i;
531
532 /*------------------------------------------------------------*/
533
534 if ( m_ahead > 0 )
535 {
536 // FLA_Househ2_UT( FLA_LEFT,
537 // a21_t,
538 // a21_b, tau11 );
540 a21_t,
541 a21_b, rs_A,
542 tau11 );
543
544 // FLA_Set( FLA_ONE, inv_tau11 );
545 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
546 // FLA_Copy( inv_tau11, minus_inv_tau11 );
547 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
550
551 // FLA_Copy( a21_t, first_elem );
552 // FLA_Set( FLA_ONE, a21_t );
553 first_elem = *a21_t;
554 *a21_t = *buff_1;
555
556 // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, z21 );
559 m_ahead,
560 buff_1,
561 A22, rs_A, cs_A,
562 a21, rs_A,
563 buff_0,
564 z21, inc_z );
565
566 // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
567 // FLA_Inv_scal( FLA_TWO, beta );
569 m_ahead,
570 a21, rs_A,
571 z21, inc_z,
572 &beta );
574
575 // FLA_Scal( minus_inv_tau11, beta );
576 // FLA_Axpy( beta, a21, z21 );
577 // FLA_Scal( inv_tau11, z21 );
580 m_ahead,
581 &beta,
582 a21, rs_A,
583 z21, inc_z );
585 m_ahead,
586 &inv_tau11,
587 z21, inc_z );
588
589 // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, a21, z21, A22 );
592 m_ahead,
593 buff_m1,
594 a21, rs_A,
595 z21, inc_z,
596 A22, rs_A, cs_A );
597
598 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
601 m_ahead,
602 n_behind,
603 buff_1,
604 A20, rs_A, cs_A,
605 a21, rs_A,
606 buff_0,
607 t01, rs_T );
608
609 // FLA_Copy( first_elem, a21_t );
610 *a21_t = first_elem;
611 }
612
613 /*------------------------------------------------------------*/
614
615 }
616
617 // FLA_Obj_free( &z );
618 FLA_free( buff_z );
619
620 return FLA_SUCCESS;
621}

References bl1_zaxpyv(), bl1_zdot(), bl1_zgemv(), bl1_zhemv(), bl1_zher2(), bl1_zscals(), bl1_zscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_step_opt_var1().

◆ FLA_Tridiag_UT_l_step_opz_var2()

FLA_Error FLA_Tridiag_UT_l_step_opz_var2 ( int  m_A,
int  m_T,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_T,
int  rs_T,
int  cs_T 
)
776{
781
788 int i;
789
790 // b_alg = FLA_Obj_length( T );
791 int b_alg = m_T;
792
793 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
794 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
795 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
796 dcomplex* buff_u = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
797 dcomplex* buff_z = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
798 dcomplex* buff_w = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
799 int inc_u = 1;
800 int inc_z = 1;
801 int inc_w = 1;
802
803 // Initialize some variables (only to prevent compiler warnings).
806
807 for ( i = 0; i < b_alg; ++i )
808 {
809 dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
810 dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
811 dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
812 dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
813
814 dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
815 dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
816
818 dcomplex* u21 = buff_u + (i+1)*inc_u;
819
820 dcomplex* zeta11 = buff_z + (i )*inc_z;
821 dcomplex* z21 = buff_z + (i+1)*inc_z;
822
823 dcomplex* w21 = buff_w + (i+1)*inc_w;
824
825 dcomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
826 dcomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
827
828 int m_ahead = m_A - i - 1;
829 int m_behind = i;
830 int n_behind = i;
831
832 /*------------------------------------------------------------*/
833
834 if ( m_behind > 0 )
835 {
836 // FLA_Copy( upsilon11, minus_upsilon11 );
837 // FLA_Scal( FLA_MINUS_ONE, minus_upsilon11 );
838 // FLA_Copy( minus_upsilon11, minus_conj_upsilon11 );
841
842 // FLA_Copy( zeta11, minus_zeta11 );
843 // FLA_Scal( FLA_MINUS_ONE, minus_zeta11 );
844 // FLA_Copy( minus_zeta11, minus_conj_zeta11 );
847
848 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon11, zeta11, alpha11 );
849 // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_zeta11, upsilon11, alpha11 );
851 1,
853 zeta11, 1,
854 alpha11, 1 );
856 1,
858 upsilon11, 1,
859 alpha11, 1 );
860
861 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_zeta11, u21, a21 );
862 // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_upsilon11, z21, a21 );
864 m_ahead,
866 u21, inc_u,
867 a21, rs_A );
869 m_ahead,
871 z21, inc_z,
872 a21, rs_A );
873 }
874
875 if ( m_ahead > 0 )
876 {
877 // FLA_Househ2_UT( FLA_LEFT,
878 // a21_t,
879 // a21_b, tau11 );
881 a21_t,
882 a21_b, rs_A,
883 tau11 );
884
885 // FLA_Set( FLA_ONE, inv_tau11 );
886 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
887 // FLA_Copy( inv_tau11, minus_inv_tau11 );
888 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
891
892 // FLA_Copy( a21_t, first_elem );
893 // FLA_Set( FLA_ONE, a21_t );
894 first_elem = *a21_t;
895 *a21_t = *buff_1;
896 }
897
898 if ( m_behind > 0 )
899 {
900 // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, u21, z21, A22 );
903 m_ahead,
904 buff_m1,
905 u21, inc_u,
906 z21, inc_z,
907 A22, rs_A, cs_A );
908 }
909
910 if ( m_ahead > 0 )
911 {
912 // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, w21 );
915 m_ahead,
916 buff_1,
917 A22, rs_A, cs_A,
918 a21, rs_A,
919 buff_0,
920 w21, inc_w );
921
922 // FLA_Copy( a21, u21 );
923 // FLA_Copy( w21, z21 );
925 m_ahead,
926 a21, rs_A,
927 u21, inc_u );
929 m_ahead,
930 w21, inc_w,
931 z21, inc_z );
932
933 // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
934 // FLA_Inv_scal( FLA_TWO, beta );
936 m_ahead,
937 a21, rs_A,
938 z21, inc_z,
939 &beta );
941
942 // FLA_Scal( minus_inv_tau11, beta );
943 // FLA_Axpy( beta, a21, z21 );
944 // FLA_Scal( inv_tau11, z21 );
947 m_ahead,
948 &beta,
949 a21, rs_A,
950 z21, inc_z );
952 m_ahead,
953 &inv_tau11,
954 z21, inc_z );
955
956 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
959 m_ahead,
960 n_behind,
961 buff_1,
962 A20, rs_A, cs_A,
963 a21, rs_A,
964 buff_0,
965 t01, rs_T );
966
967 // FLA_Copy( first_elem, a21_t );
968 *a21_t = first_elem;
969 }
970
971 if ( m_behind + 1 == b_alg && m_ahead > 0 )
972 {
973 // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, u21, z21, A22 );
976 m_ahead,
977 buff_m1,
978 u21, inc_u,
979 z21, inc_z,
980 A22, rs_A, cs_A );
981 }
982
983 /*------------------------------------------------------------*/
984
985 }
986
987 // FLA_Obj_free( &u );
988 // FLA_Obj_free( &z );
989 // FLA_Obj_free( &w );
990 FLA_free( buff_u );
991 FLA_free( buff_z );
992 FLA_free( buff_w );
993
994 return FLA_SUCCESS;
995}

References bl1_zaxpyv(), bl1_zcopyv(), bl1_zdot(), bl1_zgemv(), bl1_zhemv(), bl1_zher2(), bl1_zscals(), bl1_zscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_step_opt_var2().

◆ FLA_Tridiag_UT_l_step_opz_var3()

FLA_Error FLA_Tridiag_UT_l_step_opz_var3 ( int  m_A,
int  m_T,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_Z,
int  rs_Z,
int  cs_Z,
dcomplex buff_T,
int  rs_T,
int  cs_T 
)
793{
798
803 int i;
804
805 // b_alg = FLA_Obj_length( T );
806 int b_alg = m_T;
807
808 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &d );
809 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
810 dcomplex* buff_d = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
811 dcomplex* buff_f = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
812 int inc_d = 1;
813 int inc_f = 1;
814
815 // FLA_Set( FLA_ZERO, Z );
816 bl1_zsetm( m_A,
817 b_alg,
818 buff_0,
819 buff_Z, rs_Z, cs_Z );
820
821 for ( i = 0; i < b_alg; ++i )
822 {
823 dcomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
824 dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
825 dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
826 dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
827 dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
828
829 dcomplex* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
830 dcomplex* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
831 dcomplex* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
832
833 dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
834 dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
835
836 dcomplex* d01 = buff_d + (0 )*inc_d;
837
838 dcomplex* f01 = buff_f + (0 )*inc_f;
839
840 dcomplex* a10t_r = a10t + (i-1)*cs_A + (0 )*rs_A;
841
842 dcomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
843 dcomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
844
845 dcomplex* ABL = a10t;
846 dcomplex* ZBL = z10t;
847
849
850 int m_ahead = m_A - i - 1;
851 int m_behind = i;
852 int n_behind = i;
853
854 /*------------------------------------------------------------*/
855
856 if ( m_behind > 0 )
857 {
858 // FLA_Copy( a10t_r, last_elem );
859 // FLA_Set( FLA_ONE, a10t_r );
860 last_elem = *a10t_r;
861 *a10t_r = *buff_1;
862 }
863
864 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, z10t, FLA_ONE, a2 );
865 // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a10t, FLA_ONE, a2 );
868 m_ahead + 1,
869 n_behind,
870 buff_m1,
871 ABL, rs_A, cs_A,
872 z10t, cs_Z,
873 buff_1,
874 a2, rs_A );
877 m_ahead + 1,
878 n_behind,
879 buff_m1,
880 ZBL, rs_Z, cs_Z,
881 a10t, cs_A,
882 buff_1,
883 a2, rs_A );
884
885 if ( m_behind > 0 )
886 {
887 // FLA_Copy( last_elem, a10t_r );
888 *a10t_r = last_elem;
889 }
890
891 if ( m_ahead > 0 )
892 {
893 // FLA_Househ2_UT( FLA_LEFT,
894 // a21_t,
895 // a21_b, tau11 );
897 a21_t,
898 a21_b, rs_A,
899 tau11 );
900
901 // FLA_Set( FLA_ONE, inv_tau11 );
902 // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
903 // FLA_Copy( inv_tau11, minus_inv_tau11 );
904 // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
907
908 // FLA_Copy( a21_t, first_elem );
909 // FLA_Set( FLA_ONE, a21_t );
910 first_elem = *a21_t;
911 *a21_t = *buff_1;
912
913 // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, z21 );
916 m_ahead,
917 buff_1,
918 A22, rs_A, cs_A,
919 a21, rs_A,
920 buff_0,
921 z21, rs_Z );
922
923 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, d01 );
924 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, a21, FLA_ZERO, f01 );
927 m_ahead,
928 n_behind,
929 buff_1,
930 A20, rs_A, cs_A,
931 a21, rs_A,
932 buff_0,
933 d01, inc_d );
936 m_ahead,
937 n_behind,
938 buff_1,
939 Z20, rs_Z, cs_Z,
940 a21, rs_A,
941 buff_0,
942 f01, inc_f );
943
944 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f01, FLA_ONE, z21 );
945 // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, d01, FLA_ONE, z21 );
948 m_ahead,
949 n_behind,
950 buff_m1,
951 A20, rs_A, cs_A,
952 f01, inc_f,
953 buff_1,
954 z21, rs_Z );
957 m_ahead,
958 n_behind,
959 buff_m1,
960 Z20, rs_Z, cs_Z,
961 d01, inc_d,
962 buff_1,
963 z21, rs_Z );
964
965 // FLA_Copy( d01, t01 );
967 n_behind,
968 d01, inc_d,
969 t01, rs_T );
970
971 // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
972 // FLA_Inv_scal( FLA_TWO, beta );
974 m_ahead,
975 a21, rs_A,
976 z21, rs_Z,
977 &beta );
979
980 // FLA_Scal( minus_inv_tau11, beta );
981 // FLA_Axpy( beta, a21, z21 );
982 // FLA_Scal( inv_tau11, z21 );
985 m_ahead,
986 &beta,
987 a21, rs_A,
988 z21, rs_Z );
990 m_ahead,
991 &inv_tau11,
992 z21, rs_Z );
993
994 // FLA_Copy( first_elem, a21_t );
995 *a21_t = first_elem;
996 }
997
998 /*------------------------------------------------------------*/
999
1000 }
1001
1002 // FLA_Obj_free( &d );
1003 // FLA_Obj_free( &f );
1004 FLA_free( buff_d );
1005 FLA_free( buff_f );
1006
1007 return FLA_SUCCESS;
1008}

References bl1_zaxpyv(), bl1_zcopyv(), bl1_zdot(), bl1_zgemv(), bl1_zhemv(), bl1_zscals(), bl1_zscalv(), bl1_zsetm(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_step_opt_var3().

◆ FLA_Tridiag_UT_l_step_unb_var1()

FLA_Error FLA_Tridiag_UT_l_step_unb_var1 ( FLA_Obj  A,
FLA_Obj  T 
)
19{
20 FLA_Obj ATL, ATR, A00, a01, A02,
22 A20, a21, A22;
23 FLA_Obj TTL, TTR, T00, t01, T02,
24 TBL, TBR, t10t, tau11, t12t,
25 T20, t21, T22;
26 FLA_Obj zT, z01,
27 zB, zeta11,
28 z21;
29 FLA_Obj z;
30
35
37 a21_b;
38
40 dim_t m_A;
42
43
45
47 m_A = FLA_Obj_length( A );
48
49 FLA_Obj_create( datatype_A, 1, 1, 0, 0, &inv_tau11 );
51 FLA_Obj_create( datatype_A, 1, 1, 0, 0, &first_elem );
52 FLA_Obj_create( datatype_A, 1, 1, 0, 0, &beta );
53 FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
54
55 FLA_Part_2x2( A, &ATL, &ATR,
56 &ABL, &ABR, 0, 0, FLA_TL );
57 FLA_Part_2x2( T, &TTL, &TTR,
58 &TBL, &TBR, 0, 0, FLA_TL );
59 FLA_Part_2x1( z, &zT,
60 &zB, 0, FLA_TOP );
61
62 while ( FLA_Obj_length( ATL ) < b_alg )
63 {
64 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02,
65 /* ************* */ /* ************************** */
66 &a10t, /**/ &alpha11, &a12t,
67 ABL, /**/ ABR, &A20, /**/ &a21, &A22,
68 1, 1, FLA_BR );
69 FLA_Repart_2x2_to_3x3( TTL, /**/ TTR, &T00, /**/ &t01, &T02,
70 /* ************* */ /* ************************ */
71 &t10t, /**/ &tau11, &t12t,
72 TBL, /**/ TBR, &T20, /**/ &t21, &T22,
73 1, 1, FLA_BR );
75 /* ** */ /* ****** */
76 &zeta11,
77 zB, &z21, 1, FLA_BOTTOM );
78
79 /*------------------------------------------------------------*/
80
81 if ( FLA_Obj_length( A22 ) > 0 )
82 {
84 &a21_b, 1, FLA_TOP );
85
86 // [ u21, tau11, a21 ] = House( a21 );
88 a21_t,
89 a21_b, tau11 );
90
91 // inv_tau11 = 1 / tau11;
92 // minus_inv_tau11 = -1 / tau11;
97
98 // Save first element of a21_t and set it to one so we can use a21 as
99 // u21 in subsequent computations. We will restore a21_t later on.
102
103 // z21 = A22 * u21;
105
106 // beta = u21' * z21 / 2;
109
110 // z21 = ( z21 - beta / tau * u21 ) / tau;
112 FLA_Axpy( beta, a21, z21 );
114
115 // A22 = A22 - u21 * z21' - z21 * u21';
117
118 // t01 = U20' * u21;
120
121 // Restore first element of a21.
123 }
124
125 /*------------------------------------------------------------*/
126
127 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02,
128 a10t, alpha11, /**/ a12t,
129 /* ************** */ /* ************************ */
130 &ABL, /**/ &ABR, A20, a21, /**/ A22,
131 FLA_TL );
132 FLA_Cont_with_3x3_to_2x2( &TTL, /**/ &TTR, T00, t01, /**/ T02,
133 t10t, tau11, /**/ t12t,
134 /* ************** */ /* ********************** */
135 &TBL, /**/ &TBR, T20, t21, /**/ T22,
136 FLA_TL );
138 zeta11,
139 /* ** */ /* ****** */
140 &zB, z21, FLA_TOP );
141 }
142
146 FLA_Obj_free( &beta );
147 FLA_Obj_free( &z );
148
149 return FLA_SUCCESS;
150}
FLA_Error FLA_Copy(FLA_Obj A, FLA_Obj B)
Definition FLA_Copy.c:15
FLA_Error FLA_Scal(FLA_Obj alpha, FLA_Obj A)
Definition FLA_Scal.c:15
FLA_Error FLA_Dotc(FLA_Conj conj, FLA_Obj x, FLA_Obj y, FLA_Obj rho)
Definition FLA_Dotc.c:13
FLA_Error FLA_Axpy(FLA_Obj alpha, FLA_Obj A, FLA_Obj B)
Definition FLA_Axpy.c:15
FLA_Error FLA_Inv_scal(FLA_Obj alpha, FLA_Obj A)
Definition FLA_Inv_scal.c:13
FLA_Error FLA_Inv_scalc(FLA_Conj conjalpha, FLA_Obj alpha, FLA_Obj A)
Definition FLA_Inv_scalc.c:13
FLA_Error FLA_Her2(FLA_Uplo uplo, FLA_Obj alpha, FLA_Obj x, FLA_Obj y, FLA_Obj A)
Definition FLA_Her2.c:13
FLA_Error FLA_Hemv(FLA_Uplo uplo, FLA_Obj alpha, FLA_Obj A, FLA_Obj x, FLA_Obj beta, FLA_Obj y)
Definition FLA_Hemv.c:13
FLA_Error FLA_Gemv(FLA_Trans transa, FLA_Obj alpha, FLA_Obj A, FLA_Obj x, FLA_Obj beta, FLA_Obj y)
Definition FLA_Gemv.c:15
FLA_Error FLA_Househ2_UT(FLA_Side side, FLA_Obj chi_1, FLA_Obj x2, FLA_Obj tau)
Definition FLA_Househ2_UT.c:16

References FLA_Axpy(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copy(), FLA_Dotc(), FLA_Gemv(), FLA_Hemv(), FLA_Her2(), FLA_Househ2_UT(), FLA_Inv_scal(), FLA_Inv_scalc(), FLA_MINUS_ONE, FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_ONE, FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Scal(), FLA_Set(), FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_unb_var1().

◆ FLA_Tridiag_UT_l_step_unb_var2()

FLA_Error FLA_Tridiag_UT_l_step_unb_var2 ( FLA_Obj  A,
FLA_Obj  T 
)
19{
20 FLA_Obj ATL, ATR, A00, a01, A02,
22 A20, a21, A22;
23 FLA_Obj TTL, TTR, T00, t01, T02,
24 TBL, TBR, t10t, tau11, t12t,
25 T20, t21, T22;
26 FLA_Obj uT, u01,
28 u21;
29 FLA_Obj zT, z01,
30 zB, zeta11,
31 z21;
32 FLA_Obj wT, w01,
33 wB, omega11,
34 w21;
35 FLA_Obj u, z, w;
36
45
47 a21_b;
48
50 dim_t m_A;
52
53
55
57 m_A = FLA_Obj_length( A );
58
59 FLA_Obj_create( datatype_A, 1, 1, 0, 0, &inv_tau11 );
61 FLA_Obj_create( datatype_A, 1, 1, 0, 0, &first_elem );
62 FLA_Obj_create( datatype_A, 1, 1, 0, 0, &beta );
65 FLA_Obj_create( datatype_A, 1, 1, 0, 0, &minus_zeta11 );
67 FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
68 FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
69 FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
70
71 FLA_Part_2x2( A, &ATL, &ATR,
72 &ABL, &ABR, 0, 0, FLA_TL );
73 FLA_Part_2x2( T, &TTL, &TTR,
74 &TBL, &TBR, 0, 0, FLA_TL );
75 FLA_Part_2x1( u, &uT,
76 &uB, 0, FLA_TOP );
77 FLA_Part_2x1( z, &zT,
78 &zB, 0, FLA_TOP );
79 FLA_Part_2x1( w, &wT,
80 &wB, 0, FLA_TOP );
81
82 while ( FLA_Obj_length( ATL ) < b_alg )
83 {
84 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02,
85 /* ************* */ /* ************************** */
86 &a10t, /**/ &alpha11, &a12t,
87 ABL, /**/ ABR, &A20, /**/ &a21, &A22,
88 1, 1, FLA_BR );
89 FLA_Repart_2x2_to_3x3( TTL, /**/ TTR, &T00, /**/ &t01, &T02,
90 /* ************* */ /* ************************ */
91 &t10t, /**/ &tau11, &t12t,
92 TBL, /**/ TBR, &T20, /**/ &t21, &T22,
93 1, 1, FLA_BR );
95 /* ** */ /* ********* */
96 &upsilon11,
97 uB, &u21, 1, FLA_BOTTOM );
99 /* ** */ /* ****** */
100 &zeta11,
101 zB, &z21, 1, FLA_BOTTOM );
103 /* ** */ /* ******* */
104 &omega11,
105 wB, &w21, 1, FLA_BOTTOM );
106
107 /*------------------------------------------------------------*/
108
109 if ( FLA_Obj_length( ATL ) > 0 )
110 {
114
118
119 // alpha11 = alpha11 - upsilon11 * conj(zeta11) - zeta11 * conj(upsilon11);
122
123 // a21 = a21 - conj(zeta11) * u21 - conj(upsilon11) * z21;
126 }
127
128 if ( FLA_Obj_length( A22 ) > 0 )
129 {
131 &a21_b, 1, FLA_TOP );
132
133 // [ x21, tau11, a21 ] = House( a21 );
135 a21_t,
136 a21_b, tau11 );
137
138 // inv_tau11 = 1 / tau11;
139 // minus_inv_tau11 = -1 / tau11;
144
145 // Save first element of a21_t and set it to one so we can use a21 as
146 // u21 in subsequent computations. We will restore a21_t later on.
149 }
150
151 if ( FLA_Obj_length( ATL ) > 0 )
152 {
153 // A22 = A22 - u21 * z21' - z21 * u21';
155 }
156
157 if ( FLA_Obj_length( A22 ) > 0 )
158 {
159 // w21 = A22 * x21;
161
162 // u21 = x21;
163 // z21 = w21;
164 FLA_Copy( a21, u21 );
165 FLA_Copy( w21, z21 );
166
167 // beta = u21' * z21 / 2;
170
171 // z21 = ( z21 - beta / tau * u21 ) / tau;
173 FLA_Axpy( beta, a21, z21 );
175
176 // t01 = U20' * u21;
178
179 // Restore first element of a21.
181 }
182
183 // Update A22 if this is the last iteration; this is needed when we're
184 // being called from the blocked routine so A22 is left in a valid state.
185 if ( FLA_Obj_length( ATL ) + 1 == b_alg &&
186 FLA_Obj_length( A22 ) > 0 )
187 {
188 // A22 = A22 - u21 * y21' - z21 * u21';
190 }
191
192 /*------------------------------------------------------------*/
193
194 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02,
195 a10t, alpha11, /**/ a12t,
196 /* ************** */ /* ************************ */
197 &ABL, /**/ &ABR, A20, a21, /**/ A22,
198 FLA_TL );
199 FLA_Cont_with_3x3_to_2x2( &TTL, /**/ &TTR, T00, t01, /**/ T02,
200 t10t, tau11, /**/ t12t,
201 /* ************** */ /* ********************** */
202 &TBL, /**/ &TBR, T20, t21, /**/ T22,
203 FLA_TL );
205 upsilon11,
206 /* ** */ /* ********* */
207 &uB, u21, FLA_TOP );
209 zeta11,
210 /* ** */ /* ****** */
211 &zB, z21, FLA_TOP );
213 omega11,
214 /* ** */ /* ******* */
215 &wB, w21, FLA_TOP );
216 }
217
221 FLA_Obj_free( &beta );
226 FLA_Obj_free( &u );
227 FLA_Obj_free( &z );
228 FLA_Obj_free( &w );
229
230 return FLA_SUCCESS;
231}
FLA_Error FLA_Axpyt(FLA_Trans trans, FLA_Obj alpha, FLA_Obj A, FLA_Obj B)
Definition FLA_Axpyt.c:15
FLA_Error FLA_Copyt(FLA_Trans trans, FLA_Obj A, FLA_Obj B)
Definition FLA_Copyt.c:15

References FLA_Axpy(), FLA_Axpyt(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copy(), FLA_Copyt(), FLA_Dotc(), FLA_Gemv(), FLA_Hemv(), FLA_Her2(), FLA_Househ2_UT(), FLA_Inv_scal(), FLA_Inv_scalc(), FLA_MINUS_ONE, FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_ONE, FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Scal(), FLA_Set(), FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_unb_var2().

◆ FLA_Tridiag_UT_l_step_unb_var3()

FLA_Error FLA_Tridiag_UT_l_step_unb_var3 ( FLA_Obj  A,
FLA_Obj  Z,
FLA_Obj  T 
)
30{
31 FLA_Obj ATL, ATR, A00, a01, A02,
33 A20, a21, A22;
36 Z20, z21, Z22;
37 FLA_Obj TTL, TTR, T00, t01, T02,
38 TBL, TBR, t10t, tau11, t12t,
39 T20, t21, T22;
40 FLA_Obj dT, d01,
41 dB, delta11,
42 d21;
43 FLA_Obj fT, f01,
44 fB, phi11,
45 f21;
46 FLA_Obj d, f;
47
53
56 a21_b;
57 FLA_Obj a2;
58
60 dim_t m_A;
62
63
65
67 m_A = FLA_Obj_length( A );
68
69 FLA_Obj_create( datatype_A, 1, 1, 0, 0, &inv_tau11 );
71 FLA_Obj_create( datatype_A, 1, 1, 0, 0, &beta );
72 FLA_Obj_create( datatype_A, 1, 1, 0, 0, &first_elem );
73 FLA_Obj_create( datatype_A, 1, 1, 0, 0, &last_elem );
74 FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &d );
75 FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
76
77 FLA_Set( FLA_ZERO, Z );
78
79 FLA_Part_2x2( A, &ATL, &ATR,
80 &ABL, &ABR, 0, 0, FLA_TL );
81 FLA_Part_2x2( Z, &ZTL, &ZTR,
82 &ZBL, &ZBR, 0, 0, FLA_TL );
83 FLA_Part_2x2( T, &TTL, &TTR,
84 &TBL, &TBR, 0, 0, FLA_TL );
85 FLA_Part_2x1( d, &dT,
86 &dB, 0, FLA_TOP );
87 FLA_Part_2x1( f, &fT,
88 &fB, 0, FLA_TOP );
89
90 while ( FLA_Obj_length( ATL ) < b_alg )
91 {
92 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02,
93 /* ************* */ /* ************************** */
94 &a10t, /**/ &alpha11, &a12t,
95 ABL, /**/ ABR, &A20, /**/ &a21, &A22,
96 1, 1, FLA_BR );
97 FLA_Repart_2x2_to_3x3( ZTL, /**/ ZTR, &Z00, /**/ &z011, &Z02,
98 /* ************* */ /* ************************* */
99 &z10t, /**/ &zeta11, &z12t,
100 ZBL, /**/ ZBR, &Z20, /**/ &z21, &Z22,
101 1, 1, FLA_BR );
102 FLA_Repart_2x2_to_3x3( TTL, /**/ TTR, &T00, /**/ &t01, &T02,
103 /* ************* */ /* ************************ */
104 &t10t, /**/ &tau11, &t12t,
105 TBL, /**/ TBR, &T20, /**/ &t21, &T22,
106 1, 1, FLA_BR );
108 /* ** */ /* ******* */
109 &delta11,
110 dB, &d21, 1, FLA_BOTTOM );
112 /* ** */ /* ***** */
113 &phi11,
114 fB, &f21, 1, FLA_BOTTOM );
115
116 /*------------------------------------------------------------*/
117
118 // Save first element of a10_r and set it to one so we can use a10t as
119 // u10t in subsequent computations. We will restore a10_r later on.
120 if ( FLA_Obj_length( ATL ) > 0 )
121 {
125 }
126
128 a21, &a2 );
129
130 // alpha11 = alpha11 - u10t * z10t' - z10t * u10t';
131 // a21 = a21 - U20 * z10t' - Z20 * u10t';
134
135 // Restore last element of a10t.
136 if ( FLA_Obj_length( ATL ) > 0 )
137 {
139 }
140
141 if ( FLA_Obj_length( A22 ) > 0 )
142 {
144 &a21_b, 1, FLA_TOP );
145
146 // [ u21, tau11, a21 ] = House( a21 );
148 a21_t,
149 a21_b, tau11 );
150
151 // inv_tau11 = 1 / tau11;
152 // minus_inv_tau11 = -1 / tau11;
157
158 // Save first element of a21_t and set it to one.
161
162 // z21 = A22 * u21;
164
165 // z21 = z21 - U20 * ( Z20' * u21 ) - Z20 * ( U20' * u21 );
168
171
172 // t01 = U20' * u21;
173 FLA_Copy( d01, t01 );
174
175 // beta = u21' * z21 / 2;
178
179 // z21 = ( z21 - beta / tau * u21 ) / tau;
181 FLA_Axpy( beta, a21, z21 );
183
184 // Restore first element of a21.
186 }
187
188 /*------------------------------------------------------------*/
189
190 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02,
191 a10t, alpha11, /**/ a12t,
192 /* ************** */ /* ************************ */
193 &ABL, /**/ &ABR, A20, a21, /**/ A22,
194 FLA_TL );
195 FLA_Cont_with_3x3_to_2x2( &ZTL, /**/ &ZTR, Z00, z011, /**/ Z02,
196 z10t, zeta11, /**/ z12t,
197 /* ************** */ /* *********************** */
198 &ZBL, /**/ &ZBR, Z20, z21, /**/ Z22,
199 FLA_TL );
200 FLA_Cont_with_3x3_to_2x2( &TTL, /**/ &TTR, T00, t01, /**/ T02,
201 t10t, tau11, /**/ t12t,
202 /* ************** */ /* ********************** */
203 &TBL, /**/ &TBR, T20, t21, /**/ T22,
204 FLA_TL );
206 delta11,
207 /* ** */ /* ******* */
208 &dB, d21, FLA_TOP );
210 phi11,
211 /* ** */ /* ***** */
212 &fB, f21, FLA_TOP );
213 }
214
217 FLA_Obj_free( &beta );
220 FLA_Obj_free( &d );
221 FLA_Obj_free( &f );
222
223 return FLA_SUCCESS;
224}
FLA_Error FLA_Gemvc(FLA_Trans transa, FLA_Conj conjx, FLA_Obj alpha, FLA_Obj A, FLA_Obj x, FLA_Obj beta, FLA_Obj y)
Definition FLA_Gemvc.c:13
FLA_Error FLA_Merge_2x1(FLA_Obj AT, FLA_Obj AB, FLA_Obj *A)
Definition FLA_View.c:541

References FLA_Axpy(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copy(), FLA_Dotc(), FLA_Gemv(), FLA_Gemvc(), FLA_Hemv(), FLA_Househ2_UT(), FLA_Inv_scal(), FLA_Inv_scalc(), FLA_Merge_2x1(), FLA_MINUS_ONE, FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Scal(), FLA_Set(), FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_unb_var3().

◆ FLA_Tridiag_UT_l_unb_var1()

FLA_Error FLA_Tridiag_UT_l_unb_var1 ( FLA_Obj  A,
FLA_Obj  T 
)
14{
16}
FLA_Error FLA_Tridiag_UT_l_step_unb_var1(FLA_Obj A, FLA_Obj T)
Definition FLA_Tridiag_UT_l_unb_var1.c:18

References FLA_Tridiag_UT_l_step_unb_var1(), and i.

Referenced by FLA_Tridiag_UT_l().

◆ FLA_Tridiag_UT_l_unb_var2()

FLA_Error FLA_Tridiag_UT_l_unb_var2 ( FLA_Obj  A,
FLA_Obj  T 
)
14{
16}
FLA_Error FLA_Tridiag_UT_l_step_unb_var2(FLA_Obj A, FLA_Obj T)
Definition FLA_Tridiag_UT_l_unb_var2.c:18

References FLA_Tridiag_UT_l_step_unb_var2(), and i.

Referenced by FLA_Tridiag_UT_l().

◆ FLA_Tridiag_UT_l_unb_var3()

FLA_Error FLA_Tridiag_UT_l_unb_var3 ( FLA_Obj  A,
FLA_Obj  T 
)
14{
16 FLA_Obj Z;
17
18 //FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &Y );
20
22
23 //FLA_Obj_free( &Y );
24 FLA_Obj_free( &Z );
25
26 return r_val;
27}
FLA_Error FLA_Tridiag_UT_l_step_unb_var3(FLA_Obj A, FLA_Obj Z, FLA_Obj T)
Definition FLA_Tridiag_UT_l_unb_var3.c:29

References FLA_Obj_create_conf_to(), FLA_Obj_free(), FLA_Tridiag_UT_l_step_unb_var3(), and i.

Referenced by FLA_Tridiag_UT_l().