libflame revision_anchor
Functions
FLA_Bsvd_v_opt_var2.c File Reference

(r)

Functions

FLA_Error FLA_Bsvd_v_opt_var2 (dim_t n_iter_max, FLA_Obj d, FLA_Obj e, FLA_Obj G, FLA_Obj H, FLA_Obj RG, FLA_Obj RH, FLA_Obj W, FLA_Obj U, FLA_Obj V, dim_t b_alg)
 
FLA_Error FLA_Bsvd_v_ops_var2 (int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_H, int rs_H, int cs_H, float *buff_RG, int rs_RG, int cs_RG, float *buff_RH, int rs_RH, int cs_RH, float *buff_W, int rs_W, int cs_W, float *buff_U, int rs_U, int cs_U, float *buff_V, int rs_V, int cs_V, int b_alg)
 
FLA_Error FLA_Bsvd_v_opd_var2 (int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_H, int rs_H, int cs_H, double *buff_RG, int rs_RG, int cs_RG, double *buff_RH, int rs_RH, int cs_RH, double *buff_W, int rs_W, int cs_W, double *buff_U, int rs_U, int cs_U, double *buff_V, int rs_V, int cs_V, int b_alg)
 
FLA_Error FLA_Bsvd_v_opc_var2 (int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_H, int rs_H, int cs_H, float *buff_RG, int rs_RG, int cs_RG, float *buff_RH, int rs_RH, int cs_RH, scomplex *buff_W, int rs_W, int cs_W, scomplex *buff_U, int rs_U, int cs_U, scomplex *buff_V, int rs_V, int cs_V, int b_alg)
 
FLA_Error FLA_Bsvd_v_opz_var2 (int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_H, int rs_H, int cs_H, double *buff_RG, int rs_RG, int cs_RG, double *buff_RH, int rs_RH, int cs_RH, dcomplex *buff_W, int rs_W, int cs_W, dcomplex *buff_U, int rs_U, int cs_U, dcomplex *buff_V, int rs_V, int cs_V, int b_alg)
 

Function Documentation

◆ FLA_Bsvd_v_opc_var2()

FLA_Error FLA_Bsvd_v_opc_var2 ( int  min_m_n,
int  m_U,
int  m_V,
int  n_GH,
int  n_iter_max,
float buff_d,
int  inc_d,
float buff_e,
int  inc_e,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_H,
int  rs_H,
int  cs_H,
float buff_RG,
int  rs_RG,
int  cs_RG,
float buff_RH,
int  rs_RH,
int  cs_RH,
scomplex buff_W,
int  rs_W,
int  cs_W,
scomplex buff_U,
int  rs_U,
int  cs_U,
scomplex buff_V,
int  rs_V,
int  cs_V,
int  b_alg 
)
544{
546
547 return FLA_SUCCESS;
548}
int i
Definition bl1_axmyv2.c:145

References i.

Referenced by FLA_Bsvd_v_opt_var2().

◆ FLA_Bsvd_v_opd_var2()

FLA_Error FLA_Bsvd_v_opd_var2 ( int  min_m_n,
int  m_U,
int  m_V,
int  n_GH,
int  n_iter_max,
double buff_d,
int  inc_d,
double buff_e,
int  inc_e,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_H,
int  rs_H,
int  cs_H,
double buff_RG,
int  rs_RG,
int  cs_RG,
double buff_RH,
int  rs_RH,
int  cs_RH,
double buff_W,
int  rs_W,
int  cs_W,
double buff_U,
int  rs_U,
int  cs_U,
double buff_V,
int  rs_V,
int  cs_V,
int  b_alg 
)
229{
230 dcomplex one = bl1_z1();
231 double rone = bl1_d1();
232 double rzero = bl1_d0();
233
234 int maxitr = 6;
235
236 double eps;
237 double tolmul;
238 double tol;
239 double thresh;
240
241 dcomplex* G;
242 dcomplex* H;
243 double* d1;
244 double* e1;
245 int r_val;
246 int done;
247 int m_GH_sweep_max;
248 int ij_begin;
249 int ijTL, ijBR;
250 int m_A11;
251 int n_iter_perf;
252 int n_UV_apply;
254 int n_deflations;
255 int n_iter_prev;
257
258 // Compute some convergence constants.
260 tolmul = max( 10.0, min( 100.0, pow( eps, -0.125 ) ) );
262 tolmul,
263 maxitr,
264 buff_d, inc_d,
265 buff_e, inc_e,
266 &tol,
267 &thresh );
268
269 // Initialize our completion flag.
270 done = FALSE;
271
272 // Initialize a counter that holds the maximum number of rows of G
273 // and H that we would need to initialize for the next sweep.
275
276 // Initialize a counter for the total number of iterations performed.
277 n_iter_prev = 0;
278
279 // Initialize RG and RH to identity.
281 buff_RG, rs_RG, cs_RG );
283 buff_RH, rs_RH, cs_RH );
284
285 // Iterate until the matrix has completely deflated.
286 for ( total_deflations = 0; done != TRUE; )
287 {
288
289 // Initialize G and H to contain only identity rotations.
291 n_GH,
292 &one,
293 buff_G, rs_G, cs_G );
295 n_GH,
296 &one,
297 buff_H, rs_H, cs_H );
298
299 // Keep track of the maximum number of iterations performed in the
300 // current sweep. This is used when applying the sweep's Givens
301 // rotations.
303
304 // Perform a sweep: Move through the matrix and perform a bidiagonal
305 // SVD on each non-zero submatrix that is encountered. During the
306 // first time through, ijTL will be 0 and ijBR will be min_m_n - 1.
307 for ( ij_begin = 0; ij_begin < min_m_n; )
308 {
309
310#ifdef PRINTF
311if ( ij_begin == 0 )
312printf( "FLA_Bsvd_v_opd_var2: beginning new sweep (ij_begin = %d)\n", ij_begin );
313#endif
314
315 // Search for the first submatrix along the diagonal that is
316 // bounded by zeroes (or endpoints of the matrix). If no
317 // submatrix is found (ie: if the entire superdiagonal is zero
318 // then FLA_FAILURE is returned. This function also inspects
319 // superdiagonal elements for proximity to zero. If a given
320 // element is close enough to zero, then it is deemed
321 // converged and manually set to zero.
323 ij_begin,
324 buff_d, inc_d,
325 buff_e, inc_e,
326 &ijTL,
327 &ijBR );
328
329 // Verify that a submatrix was found. If one was not found,
330 // then we are done with the current sweep. Furthermore, if
331 // a submatrix was not found AND we began our search at the
332 // beginning of the matrix (ie: ij_begin == 0), then the
333 // matrix has completely deflated and so we are done with
334 // Francis step iteration.
335 if ( r_val == FLA_FAILURE )
336 {
337 if ( ij_begin == 0 )
338 {
339#ifdef PRINTF
340printf( "FLA_Bsvd_v_opd_var2: superdiagonal is completely zero.\n" );
341printf( "FLA_Bsvd_v_opd_var2: Francis iteration is done!\n" );
342#endif
343 done = TRUE;
344 }
345
346 // Break out of the current sweep so we can apply the last
347 // remaining Givens rotations.
348 break;
349 }
350
351 // If we got this far, then:
352 // (a) ijTL refers to the index of the first non-zero
353 // superdiagonal along the diagonal, and
354 // (b) ijBR refers to either:
355 // - the first zero element that occurs after ijTL, or
356 // - the the last diagonal element.
357 // Note that ijTL and ijBR also correspond to the first and
358 // last diagonal elements of the submatrix of interest. Thus,
359 // we may compute the dimension of this submatrix as:
360 m_A11 = ijBR - ijTL + 1;
361
362#ifdef PRINTF
363printf( "FLA_Bsvd_v_opd_var2: ij_begin = %d\n", ij_begin );
364printf( "FLA_Bsvd_v_opd_var2: ijTL = %d\n", ijTL );
365printf( "FLA_Bsvd_v_opd_var2: ijBR = %d\n", ijBR );
366printf( "FLA_Bsvd_v_opd_var2: m_A11 = %d\n", m_A11 );
367#endif
368
369 // Adjust ij_begin, which gets us ready for the next submatrix
370 // search in the current sweep.
371 ij_begin = ijBR + 1;
372
373 // Index to the submatrices upon which we will operate.
374 d1 = buff_d + ijTL * inc_d;
375 e1 = buff_e + ijTL * inc_e;
376 G = buff_G + ijTL * rs_G;
377 H = buff_H + ijTL * rs_H;
378
379 // Search for a batch of singular values, recursing on deflated
380 // subproblems whenever possible. A new singular value search is
381 // performed as long as
382 // (a) there is still matrix left to operate on, and
383 // (b) the number of iterations performed in this batch is
384 // less than n_G.
385 // If/when either of the two above conditions fails to hold,
386 // the function returns.
388 n_GH,
389 ijTL,
390 tol,
391 thresh,
392 d1, inc_d,
393 e1, inc_e,
394 G, rs_G, cs_G,
395 H, rs_H, cs_H,
396 &n_iter_perf );
397
398 // Record the number of deflations that occurred.
400
401 // Update the maximum number of iterations performed in the
402 // current sweep.
404
405#ifdef PRINTF
406printf( "FLA_Bsvd_v_opd_var2: deflations observed = %d\n", n_deflations );
407printf( "FLA_Bsvd_v_opd_var2: total deflations observed = %d\n", total_deflations );
408printf( "FLA_Bsvd_v_opd_var2: num iterations performed = %d\n", n_iter_perf );
409#endif
410
411 // Store the most recent value of ijBR in m_G_sweep_max.
412 // When the sweep is done, this value will contain the minimum
413 // number of rows of G and H we can apply and safely include all
414 // non-identity rotations that were computed during the
415 // singular value searches.
417
418 }
419
420 // The sweep is complete. Now we must apply the Givens rotations
421 // that were accumulated during the sweep.
422
423 // Recall that the number of columns of U and V to which we apply
424 // rotations is one more than the number of rotations.
426
427
428#ifdef PRINTF
429printf( "FLA_Bsvd_v_opd_var2: applying %d sets of Givens rotations\n", n_iter_perf_sweep_max );
430printf( "FLA_Bsvd_v_opd_var2: m_U = %d\n", m_U );
431printf( "FLA_Bsvd_v_opd_var2: m_V = %d\n", m_V );
432printf( "FLA_Bsvd_v_opd_var2: napp= %d\n", n_UV_apply );
433#endif
434
435 // Apply the Givens rotations. Note that we only apply k sets of
436 // rotations, where k = n_iter_perf_sweep_max. Also note that we only
437 // apply to n_UV_apply columns of U and V since this is the most we
438 // need to touch given the most recent value stored to m_GH_sweep_max.
439 //FLA_Apply_G_rf_bld_var5b( n_iter_perf_sweep_max,
441 //FLA_Apply_G_rf_bld_var9b( n_iter_perf_sweep_max,
442 //FLA_Apply_G_rf_bld_var6b( n_iter_perf_sweep_max,
443 min_m_n,
446 buff_G, rs_G, cs_G,
448 b_alg );
449 //FLA_Apply_G_rf_bld_var5b( n_iter_perf_sweep_max,
451 //FLA_Apply_G_rf_bld_var9b( n_iter_perf_sweep_max,
452 //FLA_Apply_G_rf_bld_var6b( n_iter_perf_sweep_max,
453 min_m_n,
456 buff_H, rs_H, cs_H,
458 b_alg );
459
460 // Increment the total number of iterations previously performed.
462
463#ifdef PRINTF
464printf( "FLA_Bsvd_v_opd_var2: total number of iterations performed: %d\n", n_iter_prev );
465#endif
466 }
467
468 // Copy the contents of Q to temporary storage.
470 m_U,
471 m_V,
472 buff_U, rs_U, cs_U,
473 buff_W, rs_W, cs_W );
474// W needs to be max_m_n-by-min_m_n!!!!!!!!!!!!!!!
475
476 // Multiply U by R, overwriting U.
479 m_U,
480 m_V,
481 m_V,
482 &rone,
483 ( double* )buff_W, rs_W, cs_W,
485 &rzero,
486 ( double* )buff_U, rs_U, cs_U );
487
489 m_V,
490 m_V,
491 buff_V, rs_V, cs_V,
492 buff_W, rs_W, cs_W );
493
494 // Multiply V by R, overwriting V.
497 m_V,
498 m_V,
499 m_V,
500 &rone,
501 ( double* )buff_W, rs_W, cs_W,
503 &rzero,
504 ( double* )buff_V, rs_V, cs_V );
505
506 // Make all the singular values positive.
507 {
508 int i;
509 double minus_one = bl1_dm1();
510
511 for ( i = 0; i < min_m_n; ++i )
512 {
513 if ( buff_d[ (i )*inc_d ] < rzero )
514 {
515 buff_d[ (i )*inc_d ] = -buff_d[ (i )*inc_d ];
516
517 // Scale the right singular vectors.
519 m_V,
520 &minus_one,
521 buff_V + (i )*cs_V, rs_V );
522 }
523 }
524 }
525
526 return n_iter_prev;
527}
FLA_Error FLA_Apply_G_rf_bld_var3b(int k_G, int m_A, int n_A, int i_k, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg)
Definition FLA_Apply_G_rf_blk_var3b.c:135
FLA_Error FLA_Bsvd_compute_tol_thresh_opd(int n_A, double tolmul, double maxitr, double *buff_d, int inc_d, double *buff_e, int inc_e, double *tol, double *thresh)
Definition FLA_Bsvd_compute_tol_thresh.c:137
FLA_Error FLA_Bsvd_find_submatrix_opd(int mn_A, int ij_begin, double *buff_d, int inc_d, double *buff_e, int inc_e, int *ijTL, int *ijBR)
Definition FLA_Bsvd_find_submatrix.c:73
FLA_Error FLA_Bsvd_iteracc_v_opd_var1(int m_A, int n_GH, int ijTL, double tol, double thresh, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_H, int rs_H, int cs_H, int *n_iter_perf)
Definition FLA_Bsvd_iteracc_v_opt_var1.c:176
double FLA_Mach_params_opd(FLA_Machval machval)
Definition FLA_Mach_params.c:74
void bl1_dcopymt(trans1_t trans, int m, int n, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
Definition bl1_copymt.c:148
void bl1_dgemm(trans1_t transa, trans1_t transb, int m, int k, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs, double *beta, double *c, int c_rs, int c_cs)
Definition bl1_gemm.c:274
void bl1_dscalv(conj1_t conj, int n, double *alpha, double *x, int incx)
Definition bl1_scalv.c:24
double bl1_dm1(void)
Definition bl1_constants.c:182
dcomplex bl1_z1(void)
Definition bl1_constants.c:69
double bl1_d0(void)
Definition bl1_constants.c:118
void bl1_dident(int m, double *a, int a_rs, int a_cs)
Definition bl1_ident.c:32
double bl1_d1(void)
Definition bl1_constants.c:54
void bl1_zsetm(int m, int n, dcomplex *sigma, dcomplex *a, int a_rs, int a_cs)
Definition bl1_setm.c:78
@ BLIS1_NO_TRANSPOSE
Definition blis_type_defs.h:54
@ BLIS1_NO_CONJUGATE
Definition blis_type_defs.h:81
Definition blis_type_defs.h:138

References bl1_d0(), bl1_d1(), bl1_dcopymt(), bl1_dgemm(), bl1_dident(), bl1_dm1(), bl1_dscalv(), bl1_z1(), bl1_zsetm(), BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_Apply_G_rf_bld_var3b(), FLA_Bsvd_compute_tol_thresh_opd(), FLA_Bsvd_find_submatrix_opd(), FLA_Bsvd_iteracc_v_opd_var1(), FLA_Mach_params_opd(), and i.

Referenced by FLA_Bsvd_v_opt_var2().

◆ FLA_Bsvd_v_ops_var2()

FLA_Error FLA_Bsvd_v_ops_var2 ( int  min_m_n,
int  m_U,
int  m_V,
int  n_GH,
int  n_iter_max,
float buff_d,
int  inc_d,
float buff_e,
int  inc_e,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_H,
int  rs_H,
int  cs_H,
float buff_RG,
int  rs_RG,
int  cs_RG,
float buff_RH,
int  rs_RH,
int  cs_RH,
float buff_W,
int  rs_W,
int  cs_W,
float buff_U,
int  rs_U,
int  cs_U,
float buff_V,
int  rs_V,
int  cs_V,
int  b_alg 
)
206{
208
209 return FLA_SUCCESS;
210}

References i.

Referenced by FLA_Bsvd_v_opt_var2().

◆ FLA_Bsvd_v_opt_var2()

FLA_Error FLA_Bsvd_v_opt_var2 ( dim_t  n_iter_max,
FLA_Obj  d,
FLA_Obj  e,
FLA_Obj  G,
FLA_Obj  H,
FLA_Obj  RG,
FLA_Obj  RH,
FLA_Obj  W,
FLA_Obj  U,
FLA_Obj  V,
dim_t  b_alg 
)
14{
16 FLA_Datatype datatype;
17 int m_U, m_V, n_GH;
18 int inc_d;
19 int inc_e;
20 int rs_G, cs_G;
21 int rs_H, cs_H;
22 int rs_RG, cs_RG;
23 int rs_RH, cs_RH;
24 int rs_W, cs_W;
25 int rs_U, cs_U;
26 int rs_V, cs_V;
27
28 datatype = FLA_Obj_datatype( U );
29
30 m_U = FLA_Obj_length( U );
31 m_V = FLA_Obj_length( V );
32 n_GH = FLA_Obj_width( G );
33
36
39
42
45
48
51
54
57
58
59 switch ( datatype )
60 {
61 case FLA_FLOAT:
62 {
63 float* buff_d = FLA_FLOAT_PTR( d );
64 float* buff_e = FLA_FLOAT_PTR( e );
67 float* buff_RG = FLA_FLOAT_PTR( RG );
68 float* buff_RH = FLA_FLOAT_PTR( RH );
69 float* buff_W = FLA_FLOAT_PTR( W );
70 float* buff_U = FLA_FLOAT_PTR( U );
71 float* buff_V = FLA_FLOAT_PTR( V );
72
74 m_U,
75 m_V,
76 n_GH,
87 b_alg );
88
89 break;
90 }
91
92 case FLA_DOUBLE:
93 {
94 double* buff_d = FLA_DOUBLE_PTR( d );
95 double* buff_e = FLA_DOUBLE_PTR( e );
98 double* buff_RG = FLA_DOUBLE_PTR( RG );
99 double* buff_RH = FLA_DOUBLE_PTR( RH );
100 double* buff_W = FLA_DOUBLE_PTR( W );
101 double* buff_U = FLA_DOUBLE_PTR( U );
102 double* buff_V = FLA_DOUBLE_PTR( V );
103
105 m_U,
106 m_V,
107 n_GH,
109 buff_d, inc_d,
110 buff_e, inc_e,
111 buff_G, rs_G, cs_G,
112 buff_H, rs_H, cs_H,
115 buff_W, rs_W, cs_W,
116 buff_U, rs_U, cs_U,
117 buff_V, rs_V, cs_V,
118 b_alg );
119
120 break;
121 }
122
123 case FLA_COMPLEX:
124 {
125 float* buff_d = FLA_FLOAT_PTR( d );
126 float* buff_e = FLA_FLOAT_PTR( e );
129 float* buff_RG = FLA_FLOAT_PTR( RG );
130 float* buff_RH = FLA_FLOAT_PTR( RH );
134
136 m_U,
137 m_V,
138 n_GH,
140 buff_d, inc_d,
141 buff_e, inc_e,
142 buff_G, rs_G, cs_G,
143 buff_H, rs_H, cs_H,
146 buff_W, rs_W, cs_W,
147 buff_U, rs_U, cs_U,
148 buff_V, rs_V, cs_V,
149 b_alg );
150
151 break;
152 }
153
155 {
156 double* buff_d = FLA_DOUBLE_PTR( d );
157 double* buff_e = FLA_DOUBLE_PTR( e );
160 double* buff_RG = FLA_DOUBLE_PTR( RG );
161 double* buff_RH = FLA_DOUBLE_PTR( RH );
165
167 m_U,
168 m_V,
169 n_GH,
171 buff_d, inc_d,
172 buff_e, inc_e,
173 buff_G, rs_G, cs_G,
174 buff_H, rs_H, cs_H,
177 buff_W, rs_W, cs_W,
178 buff_U, rs_U, cs_U,
179 buff_V, rs_V, cs_V,
180 b_alg );
181
182 break;
183 }
184 }
185
186 return r_val;
187}
FLA_Error FLA_Bsvd_v_opd_var2(int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_H, int rs_H, int cs_H, double *buff_RG, int rs_RG, int cs_RG, double *buff_RH, int rs_RH, int cs_RH, double *buff_W, int rs_W, int cs_W, double *buff_U, int rs_U, int cs_U, double *buff_V, int rs_V, int cs_V, int b_alg)
Definition FLA_Bsvd_v_opt_var2.c:214
FLA_Error FLA_Bsvd_v_ops_var2(int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_H, int rs_H, int cs_H, float *buff_RG, int rs_RG, int cs_RG, float *buff_RH, int rs_RH, int cs_RH, float *buff_W, int rs_W, int cs_W, float *buff_U, int rs_U, int cs_U, float *buff_V, int rs_V, int cs_V, int b_alg)
Definition FLA_Bsvd_v_opt_var2.c:191
FLA_Error FLA_Bsvd_v_opc_var2(int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_H, int rs_H, int cs_H, float *buff_RG, int rs_RG, int cs_RG, float *buff_RH, int rs_RH, int cs_RH, scomplex *buff_W, int rs_W, int cs_W, scomplex *buff_U, int rs_U, int cs_U, scomplex *buff_V, int rs_V, int cs_V, int b_alg)
Definition FLA_Bsvd_v_opt_var2.c:529
FLA_Error FLA_Bsvd_v_opz_var2(int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_H, int rs_H, int cs_H, double *buff_RG, int rs_RG, int cs_RG, double *buff_RH, int rs_RH, int cs_RH, dcomplex *buff_W, int rs_W, int cs_W, dcomplex *buff_U, int rs_U, int cs_U, dcomplex *buff_V, int rs_V, int cs_V, int b_alg)
Definition FLA_Bsvd_v_opt_var2.c:550
dim_t FLA_Obj_width(FLA_Obj obj)
Definition FLA_Query.c:123
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition FLA_Query.c:167
dim_t FLA_Obj_length(FLA_Obj obj)
Definition FLA_Query.c:116
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition FLA_Query.c:174
dim_t FLA_Obj_vector_inc(FLA_Obj obj)
Definition FLA_Query.c:145
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition FLA_Query.c:13
int FLA_Error
Definition FLA_type_defs.h:47
int FLA_Datatype
Definition FLA_type_defs.h:49
Definition blis_type_defs.h:133

References FLA_Bsvd_v_opc_var2(), FLA_Bsvd_v_opd_var2(), FLA_Bsvd_v_ops_var2(), FLA_Bsvd_v_opz_var2(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), FLA_Obj_width(), and i.

Referenced by FLA_Svd_uv_unb_var2().

◆ FLA_Bsvd_v_opz_var2()

FLA_Error FLA_Bsvd_v_opz_var2 ( int  min_m_n,
int  m_U,
int  m_V,
int  n_GH,
int  n_iter_max,
double buff_d,
int  inc_d,
double buff_e,
int  inc_e,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_H,
int  rs_H,
int  cs_H,
double buff_RG,
int  rs_RG,
int  cs_RG,
double buff_RH,
int  rs_RH,
int  cs_RH,
dcomplex buff_W,
int  rs_W,
int  cs_W,
dcomplex buff_U,
int  rs_U,
int  cs_U,
dcomplex buff_V,
int  rs_V,
int  cs_V,
int  b_alg 
)
565{
566 dcomplex one = bl1_z1();
567 double rone = bl1_d1();
568 double rzero = bl1_d0();
569
570 int maxitr = 6;
571
572 double eps;
573 double tolmul;
574 double tol;
575 double thresh;
576
577 dcomplex* G;
578 dcomplex* H;
579 double* d1;
580 double* e1;
581 int r_val;
582 int done;
583 int m_GH_sweep_max;
584 int ij_begin;
585 int ijTL, ijBR;
586 int m_A11;
587 int n_iter_perf;
588 int n_UV_apply;
590 int n_deflations;
591 int n_iter_prev;
593
594 // Compute some convergence constants.
596 tolmul = max( 10.0, min( 100.0, pow( eps, -0.125 ) ) );
598 tolmul,
599 maxitr,
600 buff_d, inc_d,
601 buff_e, inc_e,
602 &tol,
603 &thresh );
604
605 // Initialize our completion flag.
606 done = FALSE;
607
608 // Initialize a counter that holds the maximum number of rows of G
609 // and H that we would need to initialize for the next sweep.
611
612 // Initialize a counter for the total number of iterations performed.
613 n_iter_prev = 0;
614
615 // Initialize RG and RH to identity.
617 buff_RG, rs_RG, cs_RG );
619 buff_RH, rs_RH, cs_RH );
620
621 // Iterate until the matrix has completely deflated.
622 for ( total_deflations = 0; done != TRUE; )
623 {
624
625 // Initialize G and H to contain only identity rotations.
627 n_GH,
628 &one,
629 buff_G, rs_G, cs_G );
631 n_GH,
632 &one,
633 buff_H, rs_H, cs_H );
634
635 // Keep track of the maximum number of iterations performed in the
636 // current sweep. This is used when applying the sweep's Givens
637 // rotations.
639
640 // Perform a sweep: Move through the matrix and perform a bidiagonal
641 // SVD on each non-zero submatrix that is encountered. During the
642 // first time through, ijTL will be 0 and ijBR will be min_m_n - 1.
643 for ( ij_begin = 0; ij_begin < min_m_n; )
644 {
645
646#ifdef PRINTF
647if ( ij_begin == 0 )
648printf( "FLA_Bsvd_v_opz_var2: beginning new sweep (ij_begin = %d)\n", ij_begin );
649#endif
650
651 // Search for the first submatrix along the diagonal that is
652 // bounded by zeroes (or endpoints of the matrix). If no
653 // submatrix is found (ie: if the entire superdiagonal is zero
654 // then FLA_FAILURE is returned. This function also inspects
655 // superdiagonal elements for proximity to zero. If a given
656 // element is close enough to zero, then it is deemed
657 // converged and manually set to zero.
659 ij_begin,
660 buff_d, inc_d,
661 buff_e, inc_e,
662 &ijTL,
663 &ijBR );
664
665 // Verify that a submatrix was found. If one was not found,
666 // then we are done with the current sweep. Furthermore, if
667 // a submatrix was not found AND we began our search at the
668 // beginning of the matrix (ie: ij_begin == 0), then the
669 // matrix has completely deflated and so we are done with
670 // Francis step iteration.
671 if ( r_val == FLA_FAILURE )
672 {
673 if ( ij_begin == 0 )
674 {
675#ifdef PRINTF
676printf( "FLA_Bsvd_v_opz_var2: superdiagonal is completely zero.\n" );
677printf( "FLA_Bsvd_v_opz_var2: Francis iteration is done!\n" );
678#endif
679 done = TRUE;
680 }
681
682 // Break out of the current sweep so we can apply the last
683 // remaining Givens rotations.
684 break;
685 }
686
687 // If we got this far, then:
688 // (a) ijTL refers to the index of the first non-zero
689 // superdiagonal along the diagonal, and
690 // (b) ijBR refers to either:
691 // - the first zero element that occurs after ijTL, or
692 // - the the last diagonal element.
693 // Note that ijTL and ijBR also correspond to the first and
694 // last diagonal elements of the submatrix of interest. Thus,
695 // we may compute the dimension of this submatrix as:
696 m_A11 = ijBR - ijTL + 1;
697
698#ifdef PRINTF
699printf( "FLA_Bsvd_v_opz_var2: ij_begin = %d\n", ij_begin );
700printf( "FLA_Bsvd_v_opz_var2: ijTL = %d\n", ijTL );
701printf( "FLA_Bsvd_v_opz_var2: ijBR = %d\n", ijBR );
702printf( "FLA_Bsvd_v_opz_var2: m_A11 = %d\n", m_A11 );
703#endif
704
705 // Adjust ij_begin, which gets us ready for the next submatrix
706 // search in the current sweep.
707 ij_begin = ijBR + 1;
708
709 // Index to the submatrices upon which we will operate.
710 d1 = buff_d + ijTL * inc_d;
711 e1 = buff_e + ijTL * inc_e;
712 G = buff_G + ijTL * rs_G;
713 H = buff_H + ijTL * rs_H;
714
715 // Search for a batch of singular values, recursing on deflated
716 // subproblems whenever possible. A new singular value search is
717 // performed as long as
718 // (a) there is still matrix left to operate on, and
719 // (b) the number of iterations performed in this batch is
720 // less than n_G.
721 // If/when either of the two above conditions fails to hold,
722 // the function returns.
724 n_GH,
725 ijTL,
726 tol,
727 thresh,
728 d1, inc_d,
729 e1, inc_e,
730 G, rs_G, cs_G,
731 H, rs_H, cs_H,
732 &n_iter_perf );
733
734 // Record the number of deflations that occurred.
736
737 // Update the maximum number of iterations performed in the
738 // current sweep.
740
741#ifdef PRINTF
742printf( "FLA_Bsvd_v_opz_var2: deflations observed = %d\n", n_deflations );
743printf( "FLA_Bsvd_v_opz_var2: total deflations observed = %d\n", total_deflations );
744printf( "FLA_Bsvd_v_opz_var2: num iterations performed = %d\n", n_iter_perf );
745#endif
746
747 // Store the most recent value of ijBR in m_G_sweep_max.
748 // When the sweep is done, this value will contain the minimum
749 // number of rows of G and H we can apply and safely include all
750 // non-identity rotations that were computed during the
751 // singular value searches.
753
754 }
755
756 // The sweep is complete. Now we must apply the Givens rotations
757 // that were accumulated during the sweep.
758
759 // Recall that the number of columns of U and V to which we apply
760 // rotations is one more than the number of rotations.
762
763
764#ifdef PRINTF
765printf( "FLA_Bsvd_v_opz_var2: applying %d sets of Givens rotations\n", n_iter_perf_sweep_max );
766printf( "FLA_Bsvd_v_opz_var2: m_U = %d\n", m_U );
767printf( "FLA_Bsvd_v_opz_var2: m_V = %d\n", m_V );
768printf( "FLA_Bsvd_v_opz_var2: napp= %d\n", n_UV_apply );
769#endif
770
771 // Apply the Givens rotations. Note that we only apply k sets of
772 // rotations, where k = n_iter_perf_sweep_max. Also note that we only
773 // apply to n_UV_apply columns of U and V since this is the most we
774 // need to touch given the most recent value stored to m_GH_sweep_max.
775 //FLA_Apply_G_rf_bld_var5b( n_iter_perf_sweep_max,
777 //FLA_Apply_G_rf_bld_var9b( n_iter_perf_sweep_max,
778 //FLA_Apply_G_rf_bld_var6b( n_iter_perf_sweep_max,
779 min_m_n,
782 buff_G, rs_G, cs_G,
784 b_alg );
785 //FLA_Apply_G_rf_bld_var5b( n_iter_perf_sweep_max,
787 //FLA_Apply_G_rf_bld_var9b( n_iter_perf_sweep_max,
788 //FLA_Apply_G_rf_bld_var6b( n_iter_perf_sweep_max,
789 min_m_n,
792 buff_H, rs_H, cs_H,
794 b_alg );
795
796 // Increment the total number of iterations previously performed.
798
799#ifdef PRINTF
800printf( "FLA_Bsvd_v_opz_var2: total number of iterations performed: %d\n", n_iter_prev );
801#endif
802 }
803
804 // Copy the contents of Q to temporary storage.
806 m_U,
807 m_V,
808 buff_U, rs_U, cs_U,
809 buff_W, rs_W, cs_W );
810// W needs to be max_m_n-by-min_m_n!!!!!!!!!!!!!!!
811
812 // Multiply U by R, overwriting U.
815 2*m_U,
816 m_V,
817 m_V,
818 &rone,
819 ( double* )buff_W, rs_W, 2*cs_W,
821 &rzero,
822 ( double* )buff_U, rs_U, 2*cs_U );
823
825 m_V,
826 m_V,
827 buff_V, rs_V, cs_V,
828 buff_W, rs_W, cs_W );
829
830 // Multiply V by R, overwriting V.
833 2*m_V,
834 m_V,
835 m_V,
836 &rone,
837 ( double* )buff_W, rs_W, 2*cs_W,
839 &rzero,
840 ( double* )buff_V, rs_V, 2*cs_V );
841
842 // Make all the singular values positive.
843 {
844 int i;
845 double minus_one = bl1_dm1();
846
847 for ( i = 0; i < min_m_n; ++i )
848 {
849 if ( buff_d[ (i )*inc_d ] < rzero )
850 {
851 buff_d[ (i )*inc_d ] = -buff_d[ (i )*inc_d ];
852
853 // Scale the right singular vectors.
855 m_V,
856 &minus_one,
857 buff_V + (i )*cs_V, rs_V );
858 }
859 }
860 }
861
862 return n_iter_prev;
863}
void bl1_zcopymt(trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
Definition bl1_copymt.c:286
void bl1_zdscalv(conj1_t conj, int n, double *alpha, dcomplex *x, int incx)
Definition bl1_scalv.c:61

References bl1_d0(), bl1_d1(), bl1_dgemm(), bl1_dident(), bl1_dm1(), bl1_z1(), bl1_zcopymt(), bl1_zdscalv(), bl1_zsetm(), BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_Apply_G_rf_bld_var3b(), FLA_Bsvd_compute_tol_thresh_opd(), FLA_Bsvd_find_submatrix_opd(), FLA_Bsvd_iteracc_v_opd_var1(), FLA_Mach_params_opd(), and i.

Referenced by FLA_Bsvd_v_opt_var2().