libflame revision_anchor
Functions
FLA_Fused_UZhu_ZUhu_opt_var1.c File Reference

(r)

Functions

FLA_Error FLA_Fused_UZhu_ZUhu_opt_var1 (FLA_Obj delta, FLA_Obj U, FLA_Obj Z, FLA_Obj t, FLA_Obj u, FLA_Obj w)
 
FLA_Error FLA_Fused_UZhu_ZUhu_ops_var1 (int m_U, int n_U, float *buff_delta, float *buff_U, int rs_U, int cs_U, float *buff_Z, int rs_Z, int cs_Z, float *buff_t, int inc_t, float *buff_u, int inc_u, float *buff_w, int inc_w)
 
FLA_Error FLA_Fused_UZhu_ZUhu_opd_var1 (int m_U, int n_U, double *buff_delta, double *buff_U, int rs_U, int cs_U, double *buff_Z, int rs_Z, int cs_Z, double *buff_t, int inc_t, double *buff_u, int inc_u, double *buff_w, int inc_w)
 
FLA_Error FLA_Fused_UZhu_ZUhu_opc_var1 (int m_U, int n_U, scomplex *buff_delta, scomplex *buff_U, int rs_U, int cs_U, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_t, int inc_t, scomplex *buff_u, int inc_u, scomplex *buff_w, int inc_w)
 
FLA_Error FLA_Fused_UZhu_ZUhu_opz_var1 (int m_U, int n_U, dcomplex *buff_delta, dcomplex *buff_U, int rs_U, int cs_U, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_t, int inc_t, dcomplex *buff_u, int inc_u, dcomplex *buff_w, int inc_w)
 

Function Documentation

◆ FLA_Fused_UZhu_ZUhu_opc_var1()

FLA_Error FLA_Fused_UZhu_ZUhu_opc_var1 ( int  m_U,
int  n_U,
scomplex buff_delta,
scomplex buff_U,
int  rs_U,
int  cs_U,
scomplex buff_Z,
int  rs_Z,
int  cs_Z,
scomplex buff_t,
int  inc_t,
scomplex buff_u,
int  inc_u,
scomplex buff_w,
int  inc_w 
)
419{
420 int i;
421
422 for ( i = 0; i < n_U; ++i )
423 {
424 scomplex* u1 = buff_U + (i )*cs_U + (0 )*rs_U;
425 scomplex* z1 = buff_Z + (i )*cs_Z + (0 )*rs_Z;
427 scomplex* tau1 = buff_t + (i )*inc_t;
428 scomplex* u = buff_u;
429 scomplex* w = buff_w;
432
433 /*------------------------------------------------------------*/
434
436 m_U,
437 z1, rs_Z,
438 u, inc_u,
439 &alpha );
440
442 m_U,
443 u1, rs_U,
444 u, inc_u,
445 &beta );
446
447 *tau1 = beta;
448
450 bl1_cscals( delta, &beta );
451
453 m_U,
454 &alpha,
455 u1, rs_U,
456 w, inc_w );
457/*
458 F77_caxpy( &m_U,
459 &alpha,
460 u1, &rs_U,
461 w, &inc_w );
462*/
463
465 m_U,
466 &beta,
467 z1, rs_U,
468 w, inc_w );
469/*
470 F77_caxpy( &m_U,
471 &beta,
472 z1, &rs_Z,
473 w, &inc_w );
474*/
475
476 /*------------------------------------------------------------*/
477
478 }
479
480 return FLA_SUCCESS;
481}
int i
Definition bl1_axmyv2.c:145
void bl1_caxpyv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
Definition bl1_axpyv.c:29
void bl1_cdot(conj1_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho)
Definition bl1_dot.c:39
double *restrict z1
Definition bl1_dotsv2.c:148
@ BLIS1_CONJUGATE
Definition blis_type_defs.h:82
@ BLIS1_NO_CONJUGATE
Definition blis_type_defs.h:81
Definition blis_type_defs.h:133

References bl1_caxpyv(), bl1_cdot(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, i, and z1.

Referenced by FLA_Fused_UZhu_ZUhu_opt_var1(), and FLA_Tridiag_UT_l_step_ofc_var3().

◆ FLA_Fused_UZhu_ZUhu_opd_var1()

FLA_Error FLA_Fused_UZhu_ZUhu_opd_var1 ( int  m_U,
int  n_U,
double buff_delta,
double buff_U,
int  rs_U,
int  cs_U,
double buff_Z,
int  rs_Z,
int  cs_Z,
double buff_t,
int  inc_t,
double buff_u,
int  inc_u,
double buff_w,
int  inc_w 
)
230{
231 double zero = bl1_d0();
232
233 int n_run = n_U / 2;
234 int n_left = n_U % 2;
235 int step_u = 2*cs_U;
236 int step_z = 2*cs_Z;
237 int step_tau = 2*inc_t;
238 int i;
239
240 double* u = buff_u;
241 double* w = buff_w;
242 //double* delta = buff_delta;
243
244 double* u1;
245 double* u2;
246 double* u3;
247 double* z1;
248 double* z2;
249 double* z3;
250 double* tau1;
251 double* tau2;
252 double* tau3;
253
254 u1 = buff_U;
255 u2 = buff_U + cs_U;
256 u3 = buff_U + 2*cs_U;
257 z1 = buff_Z;
258 z2 = buff_Z + cs_Z;
259 z3 = buff_Z + 2*cs_Z;
260 tau1 = buff_t;
261 tau2 = buff_t + inc_t;
262 tau3 = buff_t + 2*inc_t;
263
264 for ( i = 0; i < n_run; ++i )
265 {
266 double rho_z1u;
267 double rho_z2u;
268 //double rho_z3u;
269 double rho_u1u;
270 double rho_u2u;
271 //double rho_u3u;
272
273 /*------------------------------------------------------------*/
274/*
275 bl1_ddotsv3( BLIS1_CONJUGATE,
276 m_U,
277 z1, rs_Z,
278 z2, rs_Z,
279 z3, rs_Z,
280 u, inc_u,
281 &zero,
282 &rho_z1u,
283 &rho_z2u,
284 &rho_z3u );
285 bl1_dneg1( &rho_z1u );
286 bl1_dneg1( &rho_z2u );
287 bl1_dneg1( &rho_z3u );
288
289 bl1_ddotv2axpyv2b( m_U,
290 u1, rs_U,
291 u2, rs_U,
292 u, inc_u,
293 &rho_z1u,
294 &rho_z2u,
295 &rho_u1u,
296 &rho_u2u,
297 w, inc_w );
298 bl1_ddotaxpy( m_U,
299 u3, rs_U,
300 u, inc_u,
301 &rho_z3u,
302 &rho_u3u,
303 w, inc_w );
304
305 *tau1 = rho_u1u;
306 *tau2 = rho_u2u;
307 *tau3 = rho_u3u;
308
309 bl1_dneg1( &rho_u1u );
310 bl1_dneg1( &rho_u2u );
311 bl1_dneg1( &rho_u3u );
312
313 bl1_daxpyv3b( m_U,
314 &rho_u1u,
315 &rho_u2u,
316 &rho_u3u,
317 z1, rs_Z,
318 z2, rs_Z,
319 z3, rs_Z,
320 w, inc_w );
321*/
323 m_U,
324 z1, rs_Z,
325 z2, rs_Z,
326 u, inc_u,
327 &zero,
328 &rho_z1u,
329 &rho_z2u );
330 bl1_dneg1( &rho_z1u );
331 bl1_dneg1( &rho_z2u );
332
334 u1, rs_U,
335 u2, rs_U,
336 u, inc_u,
337 &rho_z1u,
338 &rho_z2u,
339 &rho_u1u,
340 &rho_u2u,
341 w, inc_w );
342
343 *tau1 = rho_u1u;
344 *tau2 = rho_u2u;
345
346 bl1_dneg1( &rho_u1u );
347 bl1_dneg1( &rho_u2u );
348
350 &rho_u1u,
351 &rho_u2u,
352 z1, rs_Z,
353 z2, rs_Z,
354 w, inc_w );
355
356
357 /*------------------------------------------------------------*/
358
359 u1 += step_u;
360 u2 += step_u;
361 u3 += step_u;
362 z1 += step_z;
363 z2 += step_z;
364 z3 += step_z;
365 tau1 += step_tau;
366 tau2 += step_tau;
367 tau3 += step_tau;
368 }
369
370 if ( n_left > 0 )
371 {
372 for ( i = 0; i < n_left; ++i )
373 {
374 double rho_z1u;
375 double rho_u1u;
376
378 m_U,
379 z1, rs_Z,
380 u, inc_u,
381 &rho_z1u );
382 bl1_dneg1( &rho_z1u );
383
385 u1, rs_U,
386 u, inc_u,
387 &rho_z1u,
388 &rho_u1u,
389 w, inc_w );
390
391 *tau1 = rho_u1u;
392
393 bl1_dneg1( &rho_u1u );
395 m_U,
396 &rho_u1u,
397 z1, rs_Z,
398 w, inc_w );
399
400 u1 += cs_U;
401 z1 += cs_Z;
402 tau1 += inc_t;
403 }
404 }
405
406 return FLA_SUCCESS;
407}
int n_left
Definition bl1_axmyv2.c:149
int n_run
Definition bl1_axmyv2.c:148
void bl1_daxpyv2b(int n, double *alpha1, double *alpha2, double *x1, int inc_x1, double *x2, int inc_x2, double *y, int inc_y)
Definition bl1_axpyv2b.c:31
void bl1_daxpyv(conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy)
Definition bl1_axpyv.c:21
void bl1_ddot(conj1_t conj, int n, double *x, int incx, double *y, int incy, double *rho)
Definition bl1_dot.c:26
void bl1_ddotaxpy(int n, double *a, int inc_a, double *x, int inc_x, double *kappa, double *rho, double *w, int inc_w)
Definition bl1_dotaxpy.c:31
void bl1_ddotsv2(conj1_t conjxy, int n, double *x, int inc_x, double *y, int inc_y, double *z, int inc_z, double *beta, double *rho_xz, double *rho_yz)
Definition bl1_dotsv2.c:35
void bl1_ddotv2axpyv2b(int n, double *a1, int inc_a1, double *a2, int inc_a2, double *x, int inc_x, double *kappa1, double *kappa2, double *rho1, double *rho2, double *w, int inc_w)
Definition bl1_dotv2axpyv2b.c:36
double bl1_d0(void)
Definition bl1_constants.c:118

References bl1_d0(), bl1_daxpyv(), bl1_daxpyv2b(), bl1_ddot(), bl1_ddotaxpy(), bl1_ddotsv2(), bl1_ddotv2axpyv2b(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, i, n_left, n_run, and z1.

Referenced by FLA_Fused_UZhu_ZUhu_opt_var1(), and FLA_Tridiag_UT_l_step_ofd_var3().

◆ FLA_Fused_UZhu_ZUhu_ops_var1()

FLA_Error FLA_Fused_UZhu_ZUhu_ops_var1 ( int  m_U,
int  n_U,
float buff_delta,
float buff_U,
int  rs_U,
int  cs_U,
float buff_Z,
int  rs_Z,
int  cs_Z,
float buff_t,
int  inc_t,
float buff_u,
int  inc_u,
float buff_w,
int  inc_w 
)
144{
145 int i;
146
147 for ( i = 0; i < n_U; ++i )
148 {
149 float* u1 = buff_U + (i )*cs_U + (0 )*rs_U;
150 float* z1 = buff_Z + (i )*cs_Z + (0 )*rs_Z;
151 float* delta = buff_delta;
152 float* tau1 = buff_t + (i )*inc_t;
153 float* u = buff_u;
154 float* w = buff_w;
155 float alpha;
156 float beta;
157
158 /*------------------------------------------------------------*/
159
161 m_U,
162 z1, rs_Z,
163 u, inc_u,
164 &alpha );
165/*
166 alpha = F77_sdot( &m_U,
167 z1, &rs_Z,
168 u, &inc_u );
169*/
170
172 m_U,
173 u1, rs_U,
174 u, inc_u,
175 &beta );
176/*
177 beta = F77_sdot( &m_U,
178 u1, &rs_U,
179 u, &inc_u );
180*/
181
182 *tau1 = beta;
183
184 // bl1_sscals( delta, &alpha );
185 // bl1_sscals( delta, &beta );
186 alpha *= *delta;
187 beta *= *delta;
188
190 m_U,
191 &alpha,
192 u1, rs_U,
193 w, inc_w );
194/*
195 F77_saxpy( &m_U,
196 &alpha,
197 u1, &rs_U,
198 w, &inc_w );
199*/
200
202 m_U,
203 &beta,
204 z1, rs_U,
205 w, inc_w );
206/*
207 F77_saxpy( &m_U,
208 &beta,
209 z1, &rs_Z,
210 w, &inc_w );
211*/
212
213 /*------------------------------------------------------------*/
214
215 }
216
217 return FLA_SUCCESS;
218}
void bl1_saxpyv(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy)
Definition bl1_axpyv.c:13
void bl1_sdot(conj1_t conj, int n, float *x, int incx, float *y, int incy, float *rho)
Definition bl1_dot.c:13

References bl1_saxpyv(), bl1_sdot(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, i, and z1.

Referenced by FLA_Fused_UZhu_ZUhu_opt_var1(), and FLA_Tridiag_UT_l_step_ofs_var3().

◆ FLA_Fused_UZhu_ZUhu_opt_var1()

FLA_Error FLA_Fused_UZhu_ZUhu_opt_var1 ( FLA_Obj  delta,
FLA_Obj  U,
FLA_Obj  Z,
FLA_Obj  t,
FLA_Obj  u,
FLA_Obj  w 
)
14{
15/*
16 Effective computation:
17 w = w + delta * ( U ( Z' u ) + Z ( U' u ) );
18 t = U' u;
19*/
20 FLA_Datatype datatype;
21 int m_U, n_U;
22 int rs_U, cs_U;
23 int rs_Z, cs_Z;
24 int inc_u, inc_w, inc_t;
25
26 datatype = FLA_Obj_datatype( U );
27
28 m_U = FLA_Obj_length( U );
29 n_U = FLA_Obj_width( U );
30
33
36
38
40
42
43
44 switch ( datatype )
45 {
46 case FLA_FLOAT:
47 {
48 float* buff_U = FLA_FLOAT_PTR( U );
49 float* buff_Z = FLA_FLOAT_PTR( Z );
50 float* buff_t = FLA_FLOAT_PTR( t );
51 float* buff_u = FLA_FLOAT_PTR( u );
52 float* buff_w = FLA_FLOAT_PTR( w );
53 float* buff_delta = FLA_FLOAT_PTR( delta );
54
56 n_U,
62 buff_w, inc_w );
63
64 break;
65 }
66
67 case FLA_DOUBLE:
68 {
69 double* buff_U = FLA_DOUBLE_PTR( U );
70 double* buff_Z = FLA_DOUBLE_PTR( Z );
71 double* buff_t = FLA_DOUBLE_PTR( t );
72 double* buff_u = FLA_DOUBLE_PTR( u );
73 double* buff_w = FLA_DOUBLE_PTR( w );
74 double* buff_delta = FLA_DOUBLE_PTR( delta );
75
77 n_U,
83 buff_w, inc_w );
84
85 break;
86 }
87
88 case FLA_COMPLEX:
89 {
96
98 n_U,
100 buff_U, rs_U, cs_U,
101 buff_Z, rs_Z, cs_Z,
102 buff_u, inc_u,
103 buff_t, inc_t,
104 buff_w, inc_w );
105
106 break;
107 }
108
110 {
117
119 n_U,
121 buff_U, rs_U, cs_U,
122 buff_Z, rs_Z, cs_Z,
123 buff_t, inc_t,
124 buff_u, inc_u,
125 buff_w, inc_w );
126
127 break;
128 }
129 }
130
131 return FLA_SUCCESS;
132}
FLA_Error FLA_Fused_UZhu_ZUhu_opd_var1(int m_U, int n_U, double *buff_delta, double *buff_U, int rs_U, int cs_U, double *buff_Z, int rs_Z, int cs_Z, double *buff_t, int inc_t, double *buff_u, int inc_u, double *buff_w, int inc_w)
Definition FLA_Fused_UZhu_ZUhu_opt_var1.c:222
FLA_Error FLA_Fused_UZhu_ZUhu_opc_var1(int m_U, int n_U, scomplex *buff_delta, scomplex *buff_U, int rs_U, int cs_U, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_t, int inc_t, scomplex *buff_u, int inc_u, scomplex *buff_w, int inc_w)
Definition FLA_Fused_UZhu_ZUhu_opt_var1.c:411
FLA_Error FLA_Fused_UZhu_ZUhu_ops_var1(int m_U, int n_U, float *buff_delta, float *buff_U, int rs_U, int cs_U, float *buff_Z, int rs_Z, int cs_Z, float *buff_t, int inc_t, float *buff_u, int inc_u, float *buff_w, int inc_w)
Definition FLA_Fused_UZhu_ZUhu_opt_var1.c:136
FLA_Error FLA_Fused_UZhu_ZUhu_opz_var1(int m_U, int n_U, dcomplex *buff_delta, dcomplex *buff_U, int rs_U, int cs_U, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_t, int inc_t, dcomplex *buff_u, int inc_u, dcomplex *buff_w, int inc_w)
Definition FLA_Fused_UZhu_ZUhu_opt_var1.c:485
dim_t FLA_Obj_width(FLA_Obj obj)
Definition FLA_Query.c:123
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition FLA_Query.c:167
dim_t FLA_Obj_length(FLA_Obj obj)
Definition FLA_Query.c:116
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition FLA_Query.c:174
dim_t FLA_Obj_vector_inc(FLA_Obj obj)
Definition FLA_Query.c:145
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition FLA_Query.c:13
int FLA_Datatype
Definition FLA_type_defs.h:49
Definition blis_type_defs.h:138

References FLA_Fused_UZhu_ZUhu_opc_var1(), FLA_Fused_UZhu_ZUhu_opd_var1(), FLA_Fused_UZhu_ZUhu_ops_var1(), FLA_Fused_UZhu_ZUhu_opz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), FLA_Obj_width(), and i.

◆ FLA_Fused_UZhu_ZUhu_opz_var1()

FLA_Error FLA_Fused_UZhu_ZUhu_opz_var1 ( int  m_U,
int  n_U,
dcomplex buff_delta,
dcomplex buff_U,
int  rs_U,
int  cs_U,
dcomplex buff_Z,
int  rs_Z,
int  cs_Z,
dcomplex buff_t,
int  inc_t,
dcomplex buff_u,
int  inc_u,
dcomplex buff_w,
int  inc_w 
)
493{
494 //dcomplex zero = bl1_z0();
495
496 int n_run = n_U / 1;
497 int n_left = n_U % 1;
498 int step_u = 1*cs_U;
499 int step_z = 1*cs_Z;
500 int step_tau = 1*inc_t;
501 int i;
502
503 dcomplex* u = buff_u;
504 dcomplex* w = buff_w;
505 //dcomplex* delta = buff_delta;
506
507 dcomplex* u1;
508 dcomplex* u2;
509 dcomplex* z1;
510 dcomplex* z2;
511 dcomplex* tau1;
512 dcomplex* tau2;
513
514 u1 = buff_U;
515 u2 = buff_U + cs_U;
516 z1 = buff_Z;
517 z2 = buff_Z + cs_Z;
518 tau1 = buff_t;
519 tau2 = buff_t + inc_t;
520
521 for ( i = 0; i < n_run; ++i )
522 {
524 //dcomplex rho_z2u;
526 //dcomplex rho_u2u;
527
528 /*------------------------------------------------------------*/
529
530/*
531 Effective computation:
532 w = w + delta * ( U ( Z' u ) + Z ( U' u ) );
533*/
534
535/*
536 bl1_zdotsv2( BLIS1_CONJUGATE,
537 m_U,
538 z1, rs_Z,
539 u1, rs_U,
540 u, inc_u,
541 &zero,
542 &rho_z1u,
543 &rho_u1u );
544
545 *tau1 = rho_u1u;
546
547 //bl1_zscals( delta, &rho_z1u );
548 //bl1_zscals( delta, &rho_u1u );
549 bl1_zneg1( &rho_z1u );
550 bl1_zneg1( &rho_u1u );
551
552 bl1_zaxpyv2b( m_U,
553 &rho_z1u,
554 &rho_u1u,
555 u1, rs_U,
556 z1, rs_Z,
557 w, inc_w );
558*/
559/*
560 bl1_zdotsv2( BLIS1_CONJUGATE,
561 m_U,
562 z1, rs_Z,
563 z2, rs_Z,
564 u, inc_u,
565 &zero,
566 &rho_z1u,
567 &rho_z2u );
568 bl1_zneg1( &rho_z1u );
569 bl1_zneg1( &rho_z2u );
570
571 bl1_zdotv2axpyv2b( m_U,
572 u1, rs_U,
573 u2, rs_U,
574 u, inc_u,
575 &rho_z1u,
576 &rho_z2u,
577 &rho_u1u,
578 &rho_u2u,
579 w, inc_w );
580
581 *tau1 = rho_u1u;
582 *tau2 = rho_u2u;
583
584 bl1_zneg1( &rho_u1u );
585 bl1_zneg1( &rho_u2u );
586
587 bl1_zaxpyv2b( m_U,
588 &rho_u1u,
589 &rho_u2u,
590 z1, rs_Z,
591 z2, rs_Z,
592 w, inc_w );
593*/
595 m_U,
596 z1, rs_Z,
597 u, inc_u,
598 &rho_z1u );
599 bl1_zneg1( &rho_z1u );
600
602 u1, rs_U,
603 u, inc_u,
604 &rho_z1u,
605 &rho_u1u,
606 w, inc_w );
607
608 *tau1 = rho_u1u;
609
610 bl1_zneg1( &rho_u1u );
611
613 m_U,
614 &rho_u1u,
615 z1, rs_Z,
616 w, inc_w );
617
618 /*------------------------------------------------------------*/
619
620 u1 += step_u;
621 u2 += step_u;
622 z1 += step_z;
623 z2 += step_z;
624 tau1 += step_tau;
625 tau2 += step_tau;
626 }
627
628 if ( n_left == 1 )
629 {
632
634 m_U,
635 z1, rs_Z,
636 u, inc_u,
637 &rho_z1u );
638 bl1_zneg1( &rho_z1u );
639
641 u1, rs_U,
642 u, inc_u,
643 &rho_z1u,
644 &rho_u1u,
645 w, inc_w );
646
647 *tau1 = rho_u1u;
648
649 bl1_zneg1( &rho_u1u );
651 m_U,
652 &rho_u1u,
653 z1, rs_Z,
654 w, inc_w );
655 }
656
657 return FLA_SUCCESS;
658}
void bl1_zaxpyv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy)
Definition bl1_axpyv.c:60
void bl1_zdot(conj1_t conj, int n, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *rho)
Definition bl1_dot.c:65
void bl1_zdotaxpy(int n, dcomplex *a, int inc_a, dcomplex *x, int inc_x, dcomplex *kappa, dcomplex *rho, dcomplex *w, int inc_w)
Definition bl1_dotaxpy.c:258

References bl1_zaxpyv(), bl1_zdot(), bl1_zdotaxpy(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, i, n_left, n_run, and z1.

Referenced by FLA_Fused_UZhu_ZUhu_opt_var1(), and FLA_Tridiag_UT_l_step_ofz_var3().