libflame revision_anchor
Functions
FLA_Bidiag_UT_u_opt_var1.c File Reference

(r)

Functions

FLA_Error FLA_Bidiag_UT_u_opt_var1 (FLA_Obj A, FLA_Obj TU, FLA_Obj TV)
 
FLA_Error FLA_Bidiag_UT_u_step_opt_var1 (FLA_Obj A, FLA_Obj T, FLA_Obj S)
 
FLA_Error FLA_Bidiag_UT_u_step_ops_var1 (int m_A, int n_A, int m_TS, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T, float *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_opd_var1 (int m_A, int n_A, int m_TS, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T, double *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_opc_var1 (int m_A, int n_A, int m_TS, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T, scomplex *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_opz_var1 (int m_A, int n_A, int m_TS, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T, dcomplex *buff_S, int rs_S, int cs_S)
 

Function Documentation

◆ FLA_Bidiag_UT_u_opt_var1()

FLA_Error FLA_Bidiag_UT_u_opt_var1 ( FLA_Obj  A,
FLA_Obj  TU,
FLA_Obj  TV 
)
14{
16}
FLA_Error FLA_Bidiag_UT_u_step_opt_var1(FLA_Obj A, FLA_Obj T, FLA_Obj S)
Definition FLA_Bidiag_UT_u_opt_var1.c:18
int i
Definition bl1_axmyv2.c:145

References FLA_Bidiag_UT_u_step_opt_var1(), and i.

Referenced by FLA_Bidiag_UT_u().

◆ FLA_Bidiag_UT_u_step_opc_var1()

FLA_Error FLA_Bidiag_UT_u_step_opc_var1 ( int  m_A,
int  n_A,
int  m_TS,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_T,
int  rs_T,
int  cs_T,
scomplex buff_S,
int  rs_S,
int  cs_S 
)
390{
393
394 int i;
395
396 // b_alg = FLA_Obj_length( T );
397 int b_alg = m_TS;
398
399 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
400 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
401 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
402 scomplex* buff_v = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
403 int inc_v = 1;
404
405 for ( i = 0; i < b_alg; ++i )
406 {
407 scomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
408 scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
409 scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
410 scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
411 scomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
412 scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
413 scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
414
415 scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
416 scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
417
418 scomplex* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
419 scomplex* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
420
421 scomplex* v21 = buff_v + (i+1)*inc_v;
422
423 scomplex* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
424 scomplex* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
425
426 scomplex* A22_l = A22 + (0 )*cs_A + (0 )*rs_A;
427 scomplex* A22_r = A22 + (1 )*cs_A + (0 )*rs_A;
428
429 scomplex* v21_t = v21 + (0 )*inc_v;
430 scomplex* v21_b = v21 + (1 )*inc_v;
431
432 int m_ahead = m_A - i - 1;
433 int n_ahead = n_A - i - 1;
434 int m_behind = i;
435 int n_behind = i;
436
437 /*------------------------------------------------------------*/
438
439 // FLA_Househ2_UT( FLA_LEFT,
440 // alpha11,
441 // a21, tau11 );
443 alpha11,
444 a21, rs_A,
445 tau11 );
446
447 if ( n_ahead > 0 )
448 {
449 // FLA_Apply_H2_UT( FLA_LEFT, tau11, a21, a12t, A22 );
451 n_ahead,
452 tau11,
453 a21, rs_A,
454 a12t, cs_A,
455 A22, rs_A, cs_A );
456
457 // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
459 a12t_l,
460 a12t_r, cs_A,
461 sigma11 );
462
463 // FLA_Set( FLA_ONE, v21_t );
464 // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
465 *v21_t = *buff_1;
467 n_ahead - 1,
468 a12t_r, cs_A,
469 v21_b, inc_v );
470
471 // FLA_Apply_H2_UT( FLA_RIGHT, sigma11, v21_b, A22_l, A22_r );
473 n_ahead - 1,
474 sigma11,
475 v21_b, inc_v,
476 A22_l, rs_A,
477 A22_r, rs_A, cs_A );
478
479 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
482 m_behind,
483 n_ahead,
484 buff_1,
485 A02, rs_A, cs_A,
486 v21, inc_v,
487 buff_0,
488 s01, rs_S );
489 }
490
491 // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
492 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
494 n_behind,
495 a10t, cs_A,
496 t01, rs_T );
499 m_ahead,
500 n_behind,
501 buff_1,
502 A20, rs_A, cs_A,
503 a21, rs_A,
504 buff_1,
505 t01, rs_T );
506
507 /*------------------------------------------------------------*/
508
509 }
510
511 // FLA_Obj_free( &v );
512 FLA_free( buff_v );
513
514 return FLA_SUCCESS;
515}
FLA_Error FLA_Apply_H2_UT_l_opc_var1(int m_u2_A2, int n_a1t, scomplex *tau, scomplex *u2, int inc_u2, scomplex *a1t, int inc_a1t, scomplex *A2, int rs_A2, int cs_A2)
Definition FLA_Apply_H2_UT_l_opt_var1.c:269
FLA_Error FLA_Apply_H2_UT_r_opc_var1(int n_u2h_A2, int m_a1, scomplex *tau, scomplex *u2h, int inc_u2h, scomplex *a1, int inc_a1, scomplex *A2, int rs_A2, int cs_A2)
Definition FLA_Apply_H2_UT_r_opt_var1.c:254
FLA_Obj FLA_ZERO
Definition FLA_Init.c:20
FLA_Obj FLA_ONE
Definition FLA_Init.c:18
void FLA_free(void *ptr)
Definition FLA_Memory.c:247
void * FLA_malloc(size_t size)
Definition FLA_Memory.c:111
FLA_Error FLA_Househ2_UT_r_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *tau)
Definition FLA_Househ2_UT.c:677
FLA_Error FLA_Househ2_UT_l_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *tau)
Definition FLA_Househ2_UT.c:390
void bl1_ccopyv(conj1_t conj, int m, scomplex *x, int incx, scomplex *y, int incy)
Definition bl1_copyv.c:49
void bl1_cgemv(trans1_t transa, conj1_t conjx, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition bl1_gemv.c:125
@ BLIS1_CONJ_TRANSPOSE
Definition blis_type_defs.h:57
@ BLIS1_CONJ_NO_TRANSPOSE
Definition blis_type_defs.h:56
@ BLIS1_CONJUGATE
Definition blis_type_defs.h:82
@ BLIS1_NO_CONJUGATE
Definition blis_type_defs.h:81
Definition blis_type_defs.h:133

References bl1_ccopyv(), bl1_cgemv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, FLA_Apply_H2_UT_l_opc_var1(), FLA_Apply_H2_UT_r_opc_var1(), FLA_free(), FLA_Househ2_UT_l_opc(), FLA_Househ2_UT_r_opc(), FLA_malloc(), FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var1().

◆ FLA_Bidiag_UT_u_step_opd_var1()

FLA_Error FLA_Bidiag_UT_u_step_opd_var1 ( int  m_A,
int  n_A,
int  m_TS,
double buff_A,
int  rs_A,
int  cs_A,
double buff_T,
int  rs_T,
int  cs_T,
double buff_S,
int  rs_S,
int  cs_S 
)
255{
256 double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
257 double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
258
259 int i;
260
261 // b_alg = FLA_Obj_length( T );
262 int b_alg = m_TS;
263
264 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
265 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
266 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
267 double* buff_v = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
268 int inc_v = 1;
269
270 for ( i = 0; i < b_alg; ++i )
271 {
272 double* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
273 double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
274 double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
275 double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
276 double* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
277 double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
278 double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
279
280 double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
281 double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
282
283 double* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
284 double* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
285
286 double* v21 = buff_v + (i+1)*inc_v;
287
288 double* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
289 double* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
290
291 double* A22_l = A22 + (0 )*cs_A + (0 )*rs_A;
292 double* A22_r = A22 + (1 )*cs_A + (0 )*rs_A;
293
294 double* v21_t = v21 + (0 )*inc_v;
295 double* v21_b = v21 + (1 )*inc_v;
296
297 int m_ahead = m_A - i - 1;
298 int n_ahead = n_A - i - 1;
299 int m_behind = i;
300 int n_behind = i;
301
302 /*------------------------------------------------------------*/
303
304 // FLA_Househ2_UT( FLA_LEFT,
305 // alpha11,
306 // a21, tau11 );
308 alpha11,
309 a21, rs_A,
310 tau11 );
311
312 if ( n_ahead > 0 )
313 {
314 // FLA_Apply_H2_UT( FLA_LEFT, tau11, a21, a12t, A22 );
316 n_ahead,
317 tau11,
318 a21, rs_A,
319 a12t, cs_A,
320 A22, rs_A, cs_A );
321
322 // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
324 a12t_l,
325 a12t_r, cs_A,
326 sigma11 );
327
328 // FLA_Set( FLA_ONE, v21_t );
329 // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
330 *v21_t = *buff_1;
332 n_ahead - 1,
333 a12t_r, cs_A,
334 v21_b, inc_v );
335
336 // FLA_Apply_H2_UT( FLA_RIGHT, sigma11, v21_b, A22_l, A22_r );
338 n_ahead - 1,
339 sigma11,
340 v21_b, inc_v,
341 A22_l, rs_A,
342 A22_r, rs_A, cs_A );
343
344 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
347 m_behind,
348 n_ahead,
349 buff_1,
350 A02, rs_A, cs_A,
351 v21, inc_v,
352 buff_0,
353 s01, rs_S );
354 }
355
356 // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
357 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
359 n_behind,
360 a10t, cs_A,
361 t01, rs_T );
364 m_ahead,
365 n_behind,
366 buff_1,
367 A20, rs_A, cs_A,
368 a21, rs_A,
369 buff_1,
370 t01, rs_T );
371
372 /*------------------------------------------------------------*/
373
374 }
375
376 // FLA_Obj_free( &v );
377 FLA_free( buff_v );
378
379 return FLA_SUCCESS;
380}
FLA_Error FLA_Apply_H2_UT_l_opd_var1(int m_u2_A2, int n_a1t, double *tau, double *u2, int inc_u2, double *a1t, int inc_a1t, double *A2, int rs_A2, int cs_A2)
Definition FLA_Apply_H2_UT_l_opt_var1.c:195
FLA_Error FLA_Apply_H2_UT_r_opd_var1(int n_u2h_A2, int m_a1, double *tau, double *u2h, int inc_u2h, double *a1, int inc_a1, double *A2, int rs_A2, int cs_A2)
Definition FLA_Apply_H2_UT_r_opt_var1.c:181
FLA_Error FLA_Househ2_UT_r_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *tau)
Definition FLA_Househ2_UT.c:664
FLA_Error FLA_Househ2_UT_l_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *tau)
Definition FLA_Househ2_UT.c:274
void bl1_dcopyv(conj1_t conj, int m, double *x, int incx, double *y, int incy)
Definition bl1_copyv.c:42
void bl1_dgemv(trans1_t transa, conj1_t conjx, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
Definition bl1_gemv.c:69

References bl1_dcopyv(), bl1_dgemv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, FLA_Apply_H2_UT_l_opd_var1(), FLA_Apply_H2_UT_r_opd_var1(), FLA_free(), FLA_Househ2_UT_l_opd(), FLA_Househ2_UT_r_opd(), FLA_malloc(), FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var1().

◆ FLA_Bidiag_UT_u_step_ops_var1()

FLA_Error FLA_Bidiag_UT_u_step_ops_var1 ( int  m_A,
int  n_A,
int  m_TS,
float buff_A,
int  rs_A,
int  cs_A,
float buff_T,
int  rs_T,
int  cs_T,
float buff_S,
int  rs_S,
int  cs_S 
)
120{
121 float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
122 float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
123
124 int i;
125
126 // b_alg = FLA_Obj_length( T );
127 int b_alg = m_TS;
128
129 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
130 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
131 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
132 float* buff_v = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
133 int inc_v = 1;
134
135 for ( i = 0; i < b_alg; ++i )
136 {
137 float* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
138 float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
139 float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
140 float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
141 float* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
142 float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
143 float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
144
145 float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
146 float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
147
148 float* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
149 float* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
150
151 float* v21 = buff_v + (i+1)*inc_v;
152
153 float* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
154 float* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
155
156 float* A22_l = A22 + (0 )*cs_A + (0 )*rs_A;
157 float* A22_r = A22 + (1 )*cs_A + (0 )*rs_A;
158
159 float* v21_t = v21 + (0 )*inc_v;
160 float* v21_b = v21 + (1 )*inc_v;
161
162 int m_ahead = m_A - i - 1;
163 int n_ahead = n_A - i - 1;
164 int m_behind = i;
165 int n_behind = i;
166
167 /*------------------------------------------------------------*/
168
169 // FLA_Househ2_UT( FLA_LEFT,
170 // alpha11,
171 // a21, tau11 );
173 alpha11,
174 a21, rs_A,
175 tau11 );
176
177 if ( n_ahead > 0 )
178 {
179 // FLA_Apply_H2_UT( FLA_LEFT, tau11, a21, a12t, A22 );
181 n_ahead,
182 tau11,
183 a21, rs_A,
184 a12t, cs_A,
185 A22, rs_A, cs_A );
186
187 // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
189 a12t_l,
190 a12t_r, cs_A,
191 sigma11 );
192
193 // FLA_Set( FLA_ONE, v21_t );
194 // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
195 *v21_t = *buff_1;
197 n_ahead - 1,
198 a12t_r, cs_A,
199 v21_b, inc_v );
200
201 // FLA_Apply_H2_UT( FLA_RIGHT, sigma11, v21_b, A22_l, A22_r );
203 n_ahead - 1,
204 sigma11,
205 v21_b, inc_v,
206 A22_l, rs_A,
207 A22_r, rs_A, cs_A );
208
209 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
212 m_behind,
213 n_ahead,
214 buff_1,
215 A02, rs_A, cs_A,
216 v21, inc_v,
217 buff_0,
218 s01, rs_S );
219 }
220
221 // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
222 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
224 n_behind,
225 a10t, cs_A,
226 t01, rs_T );
229 m_ahead,
230 n_behind,
231 buff_1,
232 A20, rs_A, cs_A,
233 a21, rs_A,
234 buff_1,
235 t01, rs_T );
236
237 /*------------------------------------------------------------*/
238
239 }
240
241 // FLA_Obj_free( &v );
242 FLA_free( buff_v );
243
244 return FLA_SUCCESS;
245}
FLA_Error FLA_Apply_H2_UT_l_ops_var1(int m_u2_A2, int n_a1t, float *tau, float *u2, int inc_u2, float *a1t, int inc_a1t, float *A2, int rs_A2, int cs_A2)
Definition FLA_Apply_H2_UT_l_opt_var1.c:121
FLA_Error FLA_Apply_H2_UT_r_ops_var1(int n_u2h_A2, int m_a1, float *tau, float *u2h, int inc_u2h, float *a1, int inc_a1, float *A2, int rs_A2, int cs_A2)
Definition FLA_Apply_H2_UT_r_opt_var1.c:108
FLA_Error FLA_Househ2_UT_l_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *tau)
Definition FLA_Househ2_UT.c:160
FLA_Error FLA_Househ2_UT_r_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *tau)
Definition FLA_Househ2_UT.c:651
void bl1_scopyv(conj1_t conj, int m, float *x, int incx, float *y, int incy)
Definition bl1_copyv.c:35
void bl1_sgemv(trans1_t transa, conj1_t conjx, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
Definition bl1_gemv.c:13

References bl1_scopyv(), bl1_sgemv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, FLA_Apply_H2_UT_l_ops_var1(), FLA_Apply_H2_UT_r_ops_var1(), FLA_free(), FLA_Househ2_UT_l_ops(), FLA_Househ2_UT_r_ops(), FLA_malloc(), FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var1().

◆ FLA_Bidiag_UT_u_step_opt_var1()

FLA_Error FLA_Bidiag_UT_u_step_opt_var1 ( FLA_Obj  A,
FLA_Obj  T,
FLA_Obj  S 
)
19{
20 FLA_Datatype datatype;
21 int m_A, n_A, m_TS;
22 int rs_A, cs_A;
23 int rs_T, cs_T;
24 int rs_S, cs_S;
25
26 datatype = FLA_Obj_datatype( A );
27
28 m_A = FLA_Obj_length( A );
29 n_A = FLA_Obj_width( A );
31
34
37
40
41
42 switch ( datatype )
43 {
44 case FLA_FLOAT:
45 {
46 float* buff_A = FLA_FLOAT_PTR( A );
47 float* buff_T = FLA_FLOAT_PTR( T );
48 float* buff_S = FLA_FLOAT_PTR( S );
49
51 n_A,
52 m_TS,
55 buff_S, rs_S, cs_S );
56
57 break;
58 }
59
60 case FLA_DOUBLE:
61 {
62 double* buff_A = FLA_DOUBLE_PTR( A );
63 double* buff_T = FLA_DOUBLE_PTR( T );
64 double* buff_S = FLA_DOUBLE_PTR( S );
65
67 n_A,
68 m_TS,
71 buff_S, rs_S, cs_S );
72
73 break;
74 }
75
76 case FLA_COMPLEX:
77 {
81
83 n_A,
84 m_TS,
87 buff_S, rs_S, cs_S );
88
89 break;
90 }
91
93 {
97
99 n_A,
100 m_TS,
101 buff_A, rs_A, cs_A,
102 buff_T, rs_T, cs_T,
103 buff_S, rs_S, cs_S );
104
105 break;
106 }
107 }
108
109 return FLA_SUCCESS;
110}
FLA_Error FLA_Bidiag_UT_u_step_opz_var1(int m_A, int n_A, int m_TS, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T, dcomplex *buff_S, int rs_S, int cs_S)
Definition FLA_Bidiag_UT_u_opt_var1.c:519
FLA_Error FLA_Bidiag_UT_u_step_opc_var1(int m_A, int n_A, int m_TS, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T, scomplex *buff_S, int rs_S, int cs_S)
Definition FLA_Bidiag_UT_u_opt_var1.c:384
FLA_Error FLA_Bidiag_UT_u_step_opd_var1(int m_A, int n_A, int m_TS, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T, double *buff_S, int rs_S, int cs_S)
Definition FLA_Bidiag_UT_u_opt_var1.c:249
FLA_Error FLA_Bidiag_UT_u_step_ops_var1(int m_A, int n_A, int m_TS, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T, float *buff_S, int rs_S, int cs_S)
Definition FLA_Bidiag_UT_u_opt_var1.c:114
dim_t FLA_Obj_width(FLA_Obj obj)
Definition FLA_Query.c:123
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition FLA_Query.c:167
dim_t FLA_Obj_length(FLA_Obj obj)
Definition FLA_Query.c:116
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition FLA_Query.c:174
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition FLA_Query.c:13
int FLA_Datatype
Definition FLA_type_defs.h:49
Definition blis_type_defs.h:138

References FLA_Bidiag_UT_u_step_opc_var1(), FLA_Bidiag_UT_u_step_opd_var1(), FLA_Bidiag_UT_u_step_ops_var1(), FLA_Bidiag_UT_u_step_opz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), and i.

Referenced by FLA_Bidiag_UT_u_blk_var1(), and FLA_Bidiag_UT_u_opt_var1().

◆ FLA_Bidiag_UT_u_step_opz_var1()

FLA_Error FLA_Bidiag_UT_u_step_opz_var1 ( int  m_A,
int  n_A,
int  m_TS,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_T,
int  rs_T,
int  cs_T,
dcomplex buff_S,
int  rs_S,
int  cs_S 
)
525{
528
529 int i;
530
531 // b_alg = FLA_Obj_length( T );
532 int b_alg = m_TS;
533
534 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
535 // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
536 // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
537 dcomplex* buff_v = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
538 int inc_v = 1;
539
540 for ( i = 0; i < b_alg; ++i )
541 {
542 dcomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
543 dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
544 dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
545 dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
546 dcomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
547 dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
548 dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
549
550 dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
551 dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
552
553 dcomplex* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
554 dcomplex* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
555
556 dcomplex* v21 = buff_v + (i+1)*inc_v;
557
558 dcomplex* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
559 dcomplex* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
560
561 dcomplex* A22_l = A22 + (0 )*cs_A + (0 )*rs_A;
562 dcomplex* A22_r = A22 + (1 )*cs_A + (0 )*rs_A;
563
564 dcomplex* v21_t = v21 + (0 )*inc_v;
565 dcomplex* v21_b = v21 + (1 )*inc_v;
566
567 int m_ahead = m_A - i - 1;
568 int n_ahead = n_A - i - 1;
569 int m_behind = i;
570 int n_behind = i;
571
572 /*------------------------------------------------------------*/
573
574 // FLA_Househ2_UT( FLA_LEFT,
575 // alpha11,
576 // a21, tau11 );
578 alpha11,
579 a21, rs_A,
580 tau11 );
581
582 if ( n_ahead > 0 )
583 {
584 // FLA_Apply_H2_UT( FLA_LEFT, tau11, a21, a12t, A22 );
586 n_ahead,
587 tau11,
588 a21, rs_A,
589 a12t, cs_A,
590 A22, rs_A, cs_A );
591
592 // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
594 a12t_l,
595 a12t_r, cs_A,
596 sigma11 );
597
598 // FLA_Set( FLA_ONE, v21_t );
599 // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
600 *v21_t = *buff_1;
602 n_ahead - 1,
603 a12t_r, cs_A,
604 v21_b, inc_v );
605
606 // FLA_Apply_H2_UT( FLA_RIGHT, sigma11, v21_b, A22_l, A22_r );
608 n_ahead - 1,
609 sigma11,
610 v21_b, inc_v,
611 A22_l, rs_A,
612 A22_r, rs_A, cs_A );
613
614 // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
617 m_behind,
618 n_ahead,
619 buff_1,
620 A02, rs_A, cs_A,
621 v21, inc_v,
622 buff_0,
623 s01, rs_S );
624 }
625
626 // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
627 // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
629 n_behind,
630 a10t, cs_A,
631 t01, rs_T );
634 m_ahead,
635 n_behind,
636 buff_1,
637 A20, rs_A, cs_A,
638 a21, rs_A,
639 buff_1,
640 t01, rs_T );
641
642 /*------------------------------------------------------------*/
643
644 }
645
646 // FLA_Obj_free( &v );
647 FLA_free( buff_v );
648
649 return FLA_SUCCESS;
650}
FLA_Error FLA_Apply_H2_UT_l_opz_var1(int m_u2_A2, int n_a1t, dcomplex *tau, dcomplex *u2, int inc_u2, dcomplex *a1t, int inc_a1t, dcomplex *A2, int rs_A2, int cs_A2)
Definition FLA_Apply_H2_UT_l_opt_var1.c:343
FLA_Error FLA_Apply_H2_UT_r_opz_var1(int n_u2h_A2, int m_a1, dcomplex *tau, dcomplex *u2h, int inc_u2h, dcomplex *a1, int inc_a1, dcomplex *A2, int rs_A2, int cs_A2)
Definition FLA_Apply_H2_UT_r_opt_var1.c:327
FLA_Error FLA_Househ2_UT_l_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *tau)
Definition FLA_Househ2_UT.c:521
FLA_Error FLA_Househ2_UT_r_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *tau)
Definition FLA_Househ2_UT.c:693
void bl1_zcopyv(conj1_t conj, int m, dcomplex *x, int incx, dcomplex *y, int incy)
Definition bl1_copyv.c:63
void bl1_zgemv(trans1_t transa, conj1_t conjx, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition bl1_gemv.c:255

References bl1_zcopyv(), bl1_zgemv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, FLA_Apply_H2_UT_l_opz_var1(), FLA_Apply_H2_UT_r_opz_var1(), FLA_free(), FLA_Househ2_UT_l_opz(), FLA_Househ2_UT_r_opz(), FLA_malloc(), FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var1().