libflame revision_anchor
Functions
FLA_Fused_Uhu_Yhu_Zhu_opt_var1.c File Reference

(r)

Functions

FLA_Error FLA_Fused_Uhu_Yhu_Zhu_opt_var1 (FLA_Obj delta, FLA_Obj U, FLA_Obj Y, FLA_Obj Z, FLA_Obj t, FLA_Obj u, FLA_Obj y, FLA_Obj z)
 
FLA_Error FLA_Fused_Uhu_Yhu_Zhu_ops_var1 (int m_U, int n_U, float *buff_delta, float *buff_U, int rs_U, int cs_U, float *buff_Y, int rs_Y, int cs_Y, float *buff_Z, int rs_Z, int cs_Z, float *buff_t, int inc_t, float *buff_u, int inc_u, float *buff_y, int inc_y, float *buff_z, int inc_z)
 
FLA_Error FLA_Fused_Uhu_Yhu_Zhu_opd_var1 (int m_U, int n_U, double *buff_delta, double *buff_U, int rs_U, int cs_U, double *buff_Y, int rs_Y, int cs_Y, double *buff_Z, int rs_Z, int cs_Z, double *buff_t, int inc_t, double *buff_u, int inc_u, double *buff_y, int inc_y, double *buff_z, int inc_z)
 
FLA_Error FLA_Fused_Uhu_Yhu_Zhu_opc_var1 (int m_U, int n_U, scomplex *buff_delta, scomplex *buff_U, int rs_U, int cs_U, scomplex *buff_Y, int rs_Y, int cs_Y, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_t, int inc_t, scomplex *buff_u, int inc_u, scomplex *buff_y, int inc_y, scomplex *buff_z, int inc_z)
 
FLA_Error FLA_Fused_Uhu_Yhu_Zhu_opz_var1 (int m_U, int n_U, dcomplex *buff_delta, dcomplex *buff_U, int rs_U, int cs_U, dcomplex *buff_Y, int rs_Y, int cs_Y, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_t, int inc_t, dcomplex *buff_u, int inc_u, dcomplex *buff_y, int inc_y, dcomplex *buff_z, int inc_z)
 

Function Documentation

◆ FLA_Fused_Uhu_Yhu_Zhu_opc_var1()

FLA_Error FLA_Fused_Uhu_Yhu_Zhu_opc_var1 ( int  m_U,
int  n_U,
scomplex buff_delta,
scomplex buff_U,
int  rs_U,
int  cs_U,
scomplex buff_Y,
int  rs_Y,
int  cs_Y,
scomplex buff_Z,
int  rs_Z,
int  cs_Z,
scomplex buff_t,
int  inc_t,
scomplex buff_u,
int  inc_u,
scomplex buff_y,
int  inc_y,
scomplex buff_z,
int  inc_z 
)
408{
409 int i;
410
411 for ( i = 0; i < n_U; ++i )
412 {
413 scomplex* u1 = buff_U + (i )*cs_U + (0 )*rs_U;
414 scomplex* y1 = buff_Y + (i )*cs_Y + (0 )*rs_Y;
415 scomplex* z1 = buff_Z + (i )*cs_Z + (0 )*rs_Z;
417 scomplex* tau1 = buff_t + (i )*inc_t;
418 scomplex* u = buff_u;
419 scomplex* y = buff_y;
420 scomplex* z = buff_z;
424
425 /*------------------------------------------------------------*/
426
428 m_U,
429 u1, rs_U,
430 u, inc_u,
431 &alpha );
432
434 m_U,
435 z1, rs_Z,
436 u, inc_u,
437 &beta );
438
440 m_U,
441 y1, rs_Y,
442 u, inc_u,
443 &gamma );
444
445 *tau1 = alpha;
446
448 bl1_cscals( delta, &beta );
450
452 m_U,
453 &alpha,
454 y1, rs_Y,
455 y, inc_y );
456 //F77_caxpy( &m_U,
457 // &alpha,
458 // y1, &rs_Y,
459 // y, &inc_y );
460
462 m_U,
463 &beta,
464 u1, rs_U,
465 y, inc_y );
466 //F77_caxpy( &m_U,
467 // &beta,
468 // u1, &rs_U,
469 // y, &inc_y );
470
472 m_U,
473 &alpha,
474 z1, rs_Z,
475 z, inc_z );
476 //F77_caxpy( &m_U,
477 // &alpha,
478 // z1, &rs_Z,
479 // z, &inc_z );
480
482 m_U,
483 &gamma,
484 u1, rs_U,
485 z, inc_z );
486 //F77_caxpy( &m_U,
487 // &gamma,
488 // u1, &rs_U,
489 // z, &inc_z );
490
491 /*------------------------------------------------------------*/
492
493 }
494
495 return FLA_SUCCESS;
496}
int i
Definition bl1_axmyv2.c:145
void bl1_caxpyv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
Definition bl1_axpyv.c:29
void bl1_cdot(conj1_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho)
Definition bl1_dot.c:39
double *restrict z1
Definition bl1_dotsv2.c:148
double *restrict y1
Definition bl1_dotsv2.c:147
@ BLIS1_CONJUGATE
Definition blis_type_defs.h:82
@ BLIS1_NO_CONJUGATE
Definition blis_type_defs.h:81
Definition blis_type_defs.h:133

References bl1_caxpyv(), bl1_cdot(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, i, y1, and z1.

Referenced by FLA_Fused_Uhu_Yhu_Zhu_opt_var1(), and FLA_Hess_UT_step_ofc_var4().

◆ FLA_Fused_Uhu_Yhu_Zhu_opd_var1()

FLA_Error FLA_Fused_Uhu_Yhu_Zhu_opd_var1 ( int  m_U,
int  n_U,
double buff_delta,
double buff_U,
int  rs_U,
int  cs_U,
double buff_Y,
int  rs_Y,
int  cs_Y,
double buff_Z,
int  rs_Z,
int  cs_Z,
double buff_t,
int  inc_t,
double buff_u,
int  inc_u,
double buff_y,
int  inc_y,
double buff_z,
int  inc_z 
)
280{
281 double zero = bl1_d0();
282
283 double* restrict delta = buff_delta;
284 double* restrict u = buff_u;
285 double* restrict y = buff_y;
286 double* restrict z = buff_z;
287
288 double* restrict u1;
289 double* restrict y1;
290 double* restrict z1;
291 double* restrict upsilon1;
292 double* restrict tau1;
293
294 double alpha;
295 double beta;
296 double gamma;
297
298 int i;
299
300 int n_run = n_U / 1;
301 //int n_left = n_U % 1;
302 int step_u1 = 1*cs_U;
303 int step_y1 = 1*cs_Y;
304 int step_z1 = 1*cs_Z;
305 int step_upsilon1 = 1*inc_u;
306 int step_tau1 = 1*inc_t;
307
308 u1 = buff_U;
309 y1 = buff_Y;
310 z1 = buff_Z;
312 tau1 = buff_t;
313
314 for ( i = 0; i < n_run; ++i )
315 {
316 /*------------------------------------------------------------*/
317
318/*
319 bl1_ddotsv3( BLIS1_CONJUGATE,
320 m_U,
321 u1, rs_U,
322 z1, rs_Z,
323 y1, rs_Y,
324 u, inc_u,
325 &zero,
326 &alpha,
327 &beta,
328 &gamma );
329
330 *tau1 = alpha;
331
332 bl1_dscals( delta, &alpha );
333 bl1_dscals( delta, &beta );
334 bl1_dscals( delta, &gamma );
335
336 bl1_daxpyv2b( m_U,
337 &alpha,
338 &beta,
339 y1, rs_Y,
340 u1, rs_U,
341 y, inc_y );
342 bl1_daxpyv2b( m_U,
343 &alpha,
344 &gamma,
345 z1, rs_Z,
346 u1, rs_U,
347 z, inc_z );
348*/
349
351 m_U,
352 y1, rs_Y,
353 z1, rs_Z,
354 u, inc_u,
355 &zero,
356 &beta,
357 &gamma );
358
360 &gamma,
361 &beta,
362 u1, rs_U,
363 u, inc_u,
364 &alpha,
365 y, inc_y,
366 z, inc_z );
367
368 *tau1 = alpha;
369
372 m_U,
373 &alpha,
374 y1, rs_Y,
375 y, inc_y );
377 m_U,
378 &alpha,
379 z1, rs_Z,
380 z, inc_z );
381
382
383 /*------------------------------------------------------------*/
384
385 u1 += step_u1;
386 y1 += step_y1;
387 z1 += step_z1;
389 tau1 += step_tau1;
390 }
391
392
393 return FLA_SUCCESS;
394}
int n_run
Definition bl1_axmyv2.c:148
upsilon1
Definition bl1_axpyv2bdotaxpy.c:225
void bl1_daxpyv(conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy)
Definition bl1_axpyv.c:21
void bl1_ddotaxmyv2(int n, double *alpha, double *beta, double *x, int inc_x, double *u, int inc_u, double *rho, double *y, int inc_y, double *z, int inc_z)
Definition bl1_dotaxmyv2.c:34
void bl1_ddotsv2(conj1_t conjxy, int n, double *x, int inc_x, double *y, int inc_y, double *z, int inc_z, double *beta, double *rho_xz, double *rho_yz)
Definition bl1_dotsv2.c:35
double bl1_d0(void)
Definition bl1_constants.c:118

References bl1_d0(), bl1_daxpyv(), bl1_ddotaxmyv2(), bl1_ddotsv2(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, i, n_run, upsilon1, y1, and z1.

Referenced by FLA_Fused_Uhu_Yhu_Zhu_opt_var1(), and FLA_Hess_UT_step_ofd_var4().

◆ FLA_Fused_Uhu_Yhu_Zhu_ops_var1()

FLA_Error FLA_Fused_Uhu_Yhu_Zhu_ops_var1 ( int  m_U,
int  n_U,
float buff_delta,
float buff_U,
int  rs_U,
int  cs_U,
float buff_Y,
int  rs_Y,
int  cs_Y,
float buff_Z,
int  rs_Z,
int  cs_Z,
float buff_t,
int  inc_t,
float buff_u,
int  inc_u,
float buff_y,
int  inc_y,
float buff_z,
int  inc_z 
)
166{
167 int i;
168
169 for ( i = 0; i < n_U; ++i )
170 {
171 float* u1 = buff_U + (i )*cs_U + (0 )*rs_U;
172 float* y1 = buff_Y + (i )*cs_Y + (0 )*rs_Y;
173 float* z1 = buff_Z + (i )*cs_Z + (0 )*rs_Z;
174 float* delta = buff_delta;
175 float* tau1 = buff_t + (i )*inc_t;
176 float* u = buff_u;
177 float* y = buff_y;
178 float* z = buff_z;
179 float alpha;
180 float beta;
181 float gamma;
182
183 /*------------------------------------------------------------*/
184
186 m_U,
187 u1, rs_U,
188 u, inc_u,
189 &alpha );
190 //alpha = F77_sdot( &m_U,
191 // u1, &rs_U,
192 // u, &inc_u );
193
195 m_U,
196 z1, rs_Z,
197 u, inc_u,
198 &beta );
199 //beta = F77_sdot( &m_U,
200 // z1, &rs_Z,
201 // u, &inc_u );
202
204 m_U,
205 y1, rs_Y,
206 u, inc_u,
207 &gamma );
208 //gamma = F77_sdot( &m_U,
209 // y1, &rs_Y,
210 // u, &inc_u );
211
212 *tau1 = alpha;
213
214 // bl1_sscals( delta, &alpha );
215 // bl1_sscals( delta, &beta );
216 // bl1_sscals( delta, &gamma );
217 alpha *= *delta;
218 beta *= *delta;
219 gamma *= *delta;
220
222 m_U,
223 &alpha,
224 y1, rs_Y,
225 y, inc_y );
226 //F77_saxpy( &m_U,
227 // &alpha,
228 // y1, &rs_Y,
229 // y, &inc_y );
230
232 m_U,
233 &beta,
234 u1, rs_U,
235 y, inc_y );
236 //F77_saxpy( &m_U,
237 // &beta,
238 // u1, &rs_U,
239 // y, &inc_y );
240
242 m_U,
243 &alpha,
244 z1, rs_Z,
245 z, inc_z );
246 //F77_saxpy( &m_U,
247 // &alpha,
248 // z1, &rs_Z,
249 // z, &inc_z );
250
252 m_U,
253 &gamma,
254 u1, rs_U,
255 z, inc_z );
256 //F77_saxpy( &m_U,
257 // &gamma,
258 // u1, &rs_U,
259 // z, &inc_z );
260
261 /*------------------------------------------------------------*/
262
263 }
264
265 return FLA_SUCCESS;
266}
void bl1_saxpyv(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy)
Definition bl1_axpyv.c:13
void bl1_sdot(conj1_t conj, int n, float *x, int incx, float *y, int incy, float *rho)
Definition bl1_dot.c:13

References bl1_saxpyv(), bl1_sdot(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, i, y1, and z1.

Referenced by FLA_Fused_Uhu_Yhu_Zhu_opt_var1(), and FLA_Hess_UT_step_ofs_var4().

◆ FLA_Fused_Uhu_Yhu_Zhu_opt_var1()

FLA_Error FLA_Fused_Uhu_Yhu_Zhu_opt_var1 ( FLA_Obj  delta,
FLA_Obj  U,
FLA_Obj  Y,
FLA_Obj  Z,
FLA_Obj  t,
FLA_Obj  u,
FLA_Obj  y,
FLA_Obj  z 
)
14{
15/*
16 Effective computation:
17 y = y + delta * ( Y ( U' u ) + U ( Z' u ) );
18 z = z + delta * ( U ( Y' u ) + Z ( U' u ) );
19 t = U' u;
20*/
21 FLA_Datatype datatype;
22 int m_U, n_U;
23 int rs_U, cs_U;
24 int rs_Y, cs_Y;
25 int rs_Z, cs_Z;
26 int inc_u, inc_y, inc_z, inc_t;
27
28 datatype = FLA_Obj_datatype( U );
29
30 m_U = FLA_Obj_length( U );
31 n_U = FLA_Obj_width( U );
32
35
38
41
46
47
48 switch ( datatype )
49 {
50 case FLA_FLOAT:
51 {
52 float* buff_U = FLA_FLOAT_PTR( U );
53 float* buff_Y = FLA_FLOAT_PTR( Y );
54 float* buff_Z = FLA_FLOAT_PTR( Z );
55 float* buff_t = FLA_FLOAT_PTR( t );
56 float* buff_u = FLA_FLOAT_PTR( u );
57 float* buff_y = FLA_FLOAT_PTR( y );
58 float* buff_z = FLA_FLOAT_PTR( z );
59 float* buff_delta = FLA_FLOAT_PTR( delta );
60
62 n_U,
70 buff_z, inc_z );
71
72 break;
73 }
74
75 case FLA_DOUBLE:
76 {
77 double* buff_U = FLA_DOUBLE_PTR( U );
78 double* buff_Y = FLA_DOUBLE_PTR( Y );
79 double* buff_Z = FLA_DOUBLE_PTR( Z );
80 double* buff_t = FLA_DOUBLE_PTR( t );
81 double* buff_u = FLA_DOUBLE_PTR( u );
82 double* buff_y = FLA_DOUBLE_PTR( y );
83 double* buff_z = FLA_DOUBLE_PTR( z );
84 double* buff_delta = FLA_DOUBLE_PTR( delta );
85
87 n_U,
95 buff_z, inc_z );
96
97 break;
98 }
99
100 case FLA_COMPLEX:
101 {
110
112 n_U,
114 buff_U, rs_U, cs_U,
115 buff_Y, rs_Y, cs_Y,
116 buff_Z, rs_Z, cs_Z,
117 buff_t, inc_t,
118 buff_u, inc_u,
119 buff_y, inc_y,
120 buff_z, inc_z );
121
122 break;
123 }
124
126 {
135
137 n_U,
139 buff_U, rs_U, cs_U,
140 buff_Y, rs_Y, cs_Y,
141 buff_Z, rs_Z, cs_Z,
142 buff_t, inc_t,
143 buff_u, inc_u,
144 buff_y, inc_y,
145 buff_z, inc_z );
146
147 break;
148 }
149 }
150
151 return FLA_SUCCESS;
152}
FLA_Error FLA_Fused_Uhu_Yhu_Zhu_opc_var1(int m_U, int n_U, scomplex *buff_delta, scomplex *buff_U, int rs_U, int cs_U, scomplex *buff_Y, int rs_Y, int cs_Y, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_t, int inc_t, scomplex *buff_u, int inc_u, scomplex *buff_y, int inc_y, scomplex *buff_z, int inc_z)
Definition FLA_Fused_Uhu_Yhu_Zhu_opt_var1.c:398
FLA_Error FLA_Fused_Uhu_Yhu_Zhu_opd_var1(int m_U, int n_U, double *buff_delta, double *buff_U, int rs_U, int cs_U, double *buff_Y, int rs_Y, int cs_Y, double *buff_Z, int rs_Z, int cs_Z, double *buff_t, int inc_t, double *buff_u, int inc_u, double *buff_y, int inc_y, double *buff_z, int inc_z)
Definition FLA_Fused_Uhu_Yhu_Zhu_opt_var1.c:270
FLA_Error FLA_Fused_Uhu_Yhu_Zhu_ops_var1(int m_U, int n_U, float *buff_delta, float *buff_U, int rs_U, int cs_U, float *buff_Y, int rs_Y, int cs_Y, float *buff_Z, int rs_Z, int cs_Z, float *buff_t, int inc_t, float *buff_u, int inc_u, float *buff_y, int inc_y, float *buff_z, int inc_z)
Definition FLA_Fused_Uhu_Yhu_Zhu_opt_var1.c:156
FLA_Error FLA_Fused_Uhu_Yhu_Zhu_opz_var1(int m_U, int n_U, dcomplex *buff_delta, dcomplex *buff_U, int rs_U, int cs_U, dcomplex *buff_Y, int rs_Y, int cs_Y, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_t, int inc_t, dcomplex *buff_u, int inc_u, dcomplex *buff_y, int inc_y, dcomplex *buff_z, int inc_z)
Definition FLA_Fused_Uhu_Yhu_Zhu_opt_var1.c:500
dim_t FLA_Obj_width(FLA_Obj obj)
Definition FLA_Query.c:123
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition FLA_Query.c:167
dim_t FLA_Obj_length(FLA_Obj obj)
Definition FLA_Query.c:116
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition FLA_Query.c:174
dim_t FLA_Obj_vector_inc(FLA_Obj obj)
Definition FLA_Query.c:145
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition FLA_Query.c:13
int FLA_Datatype
Definition FLA_type_defs.h:49
Definition blis_type_defs.h:138

References FLA_Fused_Uhu_Yhu_Zhu_opc_var1(), FLA_Fused_Uhu_Yhu_Zhu_opd_var1(), FLA_Fused_Uhu_Yhu_Zhu_ops_var1(), FLA_Fused_Uhu_Yhu_Zhu_opz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), FLA_Obj_width(), and i.

◆ FLA_Fused_Uhu_Yhu_Zhu_opz_var1()

FLA_Error FLA_Fused_Uhu_Yhu_Zhu_opz_var1 ( int  m_U,
int  n_U,
dcomplex buff_delta,
dcomplex buff_U,
int  rs_U,
int  cs_U,
dcomplex buff_Y,
int  rs_Y,
int  cs_Y,
dcomplex buff_Z,
int  rs_Z,
int  cs_Z,
dcomplex buff_t,
int  inc_t,
dcomplex buff_u,
int  inc_u,
dcomplex buff_y,
int  inc_y,
dcomplex buff_z,
int  inc_z 
)
510{
511 dcomplex zero = bl1_z0();
512
517
523
527
528 int i;
529
530 int n_run = n_U / 1;
531 //int n_left = n_U % 1;
532 int step_u1 = 1*cs_U;
533 int step_y1 = 1*cs_Y;
534 int step_z1 = 1*cs_Z;
535 int step_upsilon1 = 1*inc_u;
536 int step_tau1 = 1*inc_t;
537
538 u1 = buff_U;
539 y1 = buff_Y;
540 z1 = buff_Z;
542 tau1 = buff_t;
543
544 for ( i = 0; i < n_run; ++i )
545 {
546 /*------------------------------------------------------------*/
547
548
550 m_U,
551 u1, rs_U,
552 z1, rs_Z,
553 y1, rs_Y,
554 u, inc_u,
555 &zero,
556 &alpha,
557 &beta,
558 &gamma );
559
560 *tau1 = alpha;
561
563 bl1_zscals( delta, &beta );
565
567 &alpha,
568 &beta,
569 y1, rs_Y,
570 u1, rs_U,
571 y, inc_y );
573 &alpha,
574 &gamma,
575 z1, rs_Z,
576 u1, rs_U,
577 z, inc_z );
578
579
580/*
581 bl1_zdotsv2( BLIS1_CONJUGATE,
582 m_U,
583 y1, rs_Y,
584 z1, rs_Z,
585 u, inc_u,
586 &zero,
587 &beta,
588 &gamma );
589
590 bl1_zdotaxmyv2( m_U,
591 &gamma,
592 &beta,
593 u1, rs_U,
594 u, inc_u,
595 &alpha,
596 y, inc_y,
597 z, inc_z );
598
599 *tau1 = alpha;
600
601 bl1_zscals( delta, &alpha );
602 bl1_zaxpyv( BLIS1_NO_CONJUGATE,
603 m_U,
604 &alpha,
605 y1, rs_Y,
606 y, inc_y );
607 bl1_zaxpyv( BLIS1_NO_CONJUGATE,
608 m_U,
609 &alpha,
610 z1, rs_Z,
611 z, inc_z );
612*/
613
614 /*------------------------------------------------------------*/
615
616 u1 += step_u1;
617 y1 += step_y1;
618 z1 += step_z1;
620 tau1 += step_tau1;
621 }
622
623 return FLA_SUCCESS;
624}
void bl1_zaxpyv2b(int n, dcomplex *alpha1, dcomplex *alpha2, dcomplex *x1, int inc_x1, dcomplex *x2, int inc_x2, dcomplex *y, int inc_y)
Definition bl1_axpyv2b.c:210
bl1_zscals(beta, rho_yz)
void bl1_zdotsv3(conj1_t conjxyw, int n, dcomplex *x, int inc_x, dcomplex *y, int inc_y, dcomplex *w, int inc_w, dcomplex *z, int inc_z, dcomplex *beta, dcomplex *rho_xz, dcomplex *rho_yz, dcomplex *rho_wz)
Definition bl1_dotsv3.c:290
dcomplex bl1_z0(void)
Definition bl1_constants.c:133

References bl1_z0(), bl1_zaxpyv2b(), bl1_zdotsv3(), bl1_zscals(), BLIS1_CONJUGATE, i, n_run, upsilon1, y1, and z1.

Referenced by FLA_Fused_Uhu_Yhu_Zhu_opt_var1(), and FLA_Hess_UT_step_ofz_var4().