libflame revision_anchor
Functions
FLA_Tevd_v_opt_var2.c File Reference

(r)

Functions

FLA_Error FLA_Tevd_v_opt_var2 (dim_t n_iter_max, FLA_Obj d, FLA_Obj e, FLA_Obj G, FLA_Obj R, FLA_Obj W, FLA_Obj U, dim_t b_alg)
 
FLA_Error FLA_Tevd_v_ops_var2 (int m_A, int m_U, int n_G, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, float *buff_R, int rs_R, int cs_R, float *buff_W, int rs_W, int cs_W, float *buff_U, int rs_U, int cs_U, int b_alg)
 
FLA_Error FLA_Tevd_v_opd_var2 (int m_A, int m_U, int n_G, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, double *buff_R, int rs_R, int cs_R, double *buff_W, int rs_W, int cs_W, double *buff_U, int rs_U, int cs_U, int b_alg)
 
FLA_Error FLA_Tevd_v_opc_var2 (int m_A, int m_U, int n_G, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, float *buff_R, int rs_R, int cs_R, scomplex *buff_W, int rs_W, int cs_W, scomplex *buff_U, int rs_U, int cs_U, int b_alg)
 
FLA_Error FLA_Tevd_v_opz_var2 (int m_A, int m_U, int n_G, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, double *buff_R, int rs_R, int cs_R, dcomplex *buff_W, int rs_W, int cs_W, dcomplex *buff_U, int rs_U, int cs_U, int b_alg)
 

Function Documentation

◆ FLA_Tevd_v_opc_var2()

FLA_Error FLA_Tevd_v_opc_var2 ( int  m_A,
int  m_U,
int  n_G,
int  n_iter_max,
float buff_d,
int  inc_d,
float buff_e,
int  inc_e,
scomplex buff_G,
int  rs_G,
int  cs_G,
float buff_R,
int  rs_R,
int  cs_R,
scomplex buff_W,
int  rs_W,
int  cs_W,
scomplex buff_U,
int  rs_U,
int  cs_U,
int  b_alg 
)
416{
418
419 return FLA_SUCCESS;
420}
int i
Definition bl1_axmyv2.c:145

References i.

Referenced by FLA_Tevd_v_opt_var2().

◆ FLA_Tevd_v_opd_var2()

FLA_Error FLA_Tevd_v_opd_var2 ( int  m_A,
int  m_U,
int  n_G,
int  n_iter_max,
double buff_d,
int  inc_d,
double buff_e,
int  inc_e,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double buff_R,
int  rs_R,
int  cs_R,
double buff_W,
int  rs_W,
int  cs_W,
double buff_U,
int  rs_U,
int  cs_U,
int  b_alg 
)
181{
182 dcomplex one = bl1_z1();
183 double rone = bl1_d1();
184 double rzero = bl1_d0();
185
186 dcomplex* G;
187 double* d1;
188 double* e1;
189 int r_val;
190 int done;
191 int m_G_sweep_max;
192 int ij_begin;
193 int ijTL, ijBR;
194 int m_A11;
195 int n_iter_perf;
196 int n_U_apply;
198 int n_deflations;
199 int n_iter_prev;
201
202 // Initialize our completion flag.
203 done = FALSE;
204
205 // Initialize a counter that holds the maximum number of rows of G
206 // that we would need to initialize for the next sweep.
207 m_G_sweep_max = m_A - 1;
208
209 // Initialize a counter for the total number of iterations performed.
210 n_iter_prev = 0;
211
212 // Initialize R to identity.
214 buff_R, rs_R, cs_R );
215
216 // Iterate until the matrix has completely deflated.
217 for ( total_deflations = 0; done != TRUE; )
218 {
219
220 // Initialize G to contain only identity rotations.
222 n_G,
223 &one,
224 buff_G, rs_G, cs_G );
225
226 // Keep track of the maximum number of iterations performed in the
227 // current sweep. This is used when applying the sweep's Givens
228 // rotations.
230
231 // Perform a sweep: Move through the matrix and perform a tridiagonal
232 // EVD on each non-zero submatrix that is encountered. During the
233 // first time through, ijTL will be 0 and ijBR will be m_A - 1.
234 for ( ij_begin = 0; ij_begin < m_A; )
235 {
236
237#ifdef PRINTF
238if ( ij_begin == 0 )
239printf( "FLA_Tevd_v_opd_var2: beginning new sweep (ij_begin = %d)\n", ij_begin );
240#endif
241
242 // Search for the first submatrix along the diagonal that is
243 // bounded by zeroes (or endpoints of the matrix). If no
244 // submatrix is found (ie: if the entire subdiagonal is zero
245 // then FLA_FAILURE is returned. This function also inspects
246 // subdiagonal elements for proximity to zero. If a given
247 // element is close enough to zero, then it is deemed
248 // converged and manually set to zero.
250 ij_begin,
251 buff_d, inc_d,
252 buff_e, inc_e,
253 &ijTL,
254 &ijBR );
255
256 // Verify that a submatrix was found. If one was not found,
257 // then we are done with the current sweep. Furthermore, if
258 // a submatrix was not found AND we began our search at the
259 // beginning of the matrix (ie: ij_begin == 0), then the
260 // matrix has completely deflated and so we are done with
261 // Francis step iteration.
262 if ( r_val == FLA_FAILURE )
263 {
264 if ( ij_begin == 0 )
265 {
266#ifdef PRINTF
267printf( "FLA_Tevd_v_opd_var2: subdiagonal is completely zero.\n" );
268printf( "FLA_Tevd_v_opd_var2: Francis iteration is done!\n" );
269#endif
270 done = TRUE;
271 }
272
273 // Break out of the current sweep so we can apply the last
274 // remaining Givens rotations.
275 break;
276 }
277
278 // If we got this far, then:
279 // (a) ijTL refers to the index of the first non-zero
280 // subdiagonal along the diagonal, and
281 // (b) ijBR refers to either:
282 // - the first zero element that occurs after ijTL, or
283 // - the the last diagonal element.
284 // Note that ijTL and ijBR also correspond to the first and
285 // last diagonal elements of the submatrix of interest. Thus,
286 // we may compute the dimension of this submatrix as:
287 m_A11 = ijBR - ijTL + 1;
288
289#ifdef PRINTF
290printf( "FLA_Tevd_v_opd_var2: ij_begin = %d\n", ij_begin );
291printf( "FLA_Tevd_v_opd_var2: ijTL = %d\n", ijTL );
292printf( "FLA_Tevd_v_opd_var2: ijBR = %d\n", ijBR );
293printf( "FLA_Tevd_v_opd_var2: m_A11 = %d\n", m_A11 );
294#endif
295
296 // Adjust ij_begin, which gets us ready for the next subproblem, if
297 // there is one.
298 ij_begin = ijBR + 1;
299
300 // Index to the submatrices upon which we will operate.
301 d1 = buff_d + ijTL * inc_d;
302 e1 = buff_e + ijTL * inc_e;
303 G = buff_G + ijTL * rs_G;
304
305 // Search for a batch of eigenvalues, recursing on deflated
306 // subproblems whenever a split occurs. Iteration continues
307 // as long as
308 // (a) there is still matrix left to operate on, and
309 // (b) the number of iterations performed in this batch is
310 // less than n_G.
311 // If/when either of the two above conditions fails to hold,
312 // the function returns.
314 n_G,
315 ijTL,
316 d1, inc_d,
317 e1, inc_e,
318 G, rs_G, cs_G,
319 &n_iter_perf );
320
321 // Record the number of deflations that we observed.
323
324 // Update the maximum number of iterations performed in the
325 // current sweep.
327
328#ifdef PRINTF
329printf( "FLA_Tevd_v_opd_var2: deflations observed = %d\n", n_deflations );
330printf( "FLA_Tevd_v_opd_var2: total deflations observed = %d\n", total_deflations );
331printf( "FLA_Tevd_v_opd_var2: num iterations = %d\n", n_iter_perf );
332#endif
333
334 // Store the most recent value of ijBR in m_G_sweep_max.
335 // When the sweep is done, this value will contain the minimum
336 // number of rows of G we can apply and safely include all
337 // non-identity rotations that were computed during the
338 // eigenvalue searches.
340
341 // Make sure we haven't exceeded our maximum iteration count.
342 if ( n_iter_prev >= m_A * n_iter_max )
343 {
344#ifdef PRINTF
345printf( "FLA_Tevd_v_opd_var2: reached maximum total number of iterations: %d\n", n_iter_prev );
346#endif
347 FLA_Abort();
348 //return FLA_FAILURE;
349 }
350 }
351
352 // The sweep is complete. Now we must apply the Givens rotations
353 // that were accumulated during the sweep.
354
355
356 // Recall that the number of columns of U to which we apply
357 // rotations is one more than the number of rotations.
359
360 // Apply the Givens rotations that were computed as part of
361 // the previous batch of iterations.
362 //FLA_Apply_G_rf_bld_var8b( n_iter_perf_sweep_max,
363 //FLA_Apply_G_rf_bld_var5b( n_iter_perf_sweep_max,
365 //FLA_Apply_G_rf_bld_var9b( n_iter_perf_sweep_max,
366 //FLA_Apply_G_rf_bld_var6b( n_iter_perf_sweep_max,
367 m_U,
368 n_U_apply,
370 buff_G, rs_G, cs_G,
371 buff_R, rs_R, cs_R,
372 b_alg );
373
374#ifdef PRINTF
375printf( "FLA_Tevd_v_opd_var2: applying %d sets of Givens rotations\n", n_iter_perf_sweep_max );
376#endif
377
378 // Increment the total number of iterations previously performed.
380 }
381
382 // Copy the contents of Q to temporary storage.
384 m_A,
385 m_A,
386 buff_U, rs_U, cs_U,
387 buff_W, rs_W, cs_W );
388
389
390 // Multiply Q by R, overwriting U.
393 m_A,
394 m_A,
395 m_A,
396 &rone,
397 ( double* )buff_W, rs_W, cs_W,
398 buff_R, rs_R, cs_R,
399 &rzero,
400 ( double* )buff_U, rs_U, cs_U );
401
402 return n_iter_prev;
403}
FLA_Error FLA_Apply_G_rf_bld_var3b(int k_G, int m_A, int n_A, int i_k, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg)
Definition FLA_Apply_G_rf_blk_var3b.c:135
FLA_Error FLA_Tevd_iteracc_v_opd_var1(int m_A, int n_G, int ijTL, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, int *n_iter_perf)
Definition FLA_Tevd_iteracc_v_opt_var1.c:26
FLA_Error FLA_Tevd_find_submatrix_opd(int m_A, int ij_begin, double *buff_d, int inc_d, double *buff_e, int inc_e, int *ijTL, int *ijBR)
Definition FLA_Tevd_find_submatrix.c:28
void FLA_Abort(void)
Definition FLA_Error.c:248
void bl1_dcopymt(trans1_t trans, int m, int n, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
Definition bl1_copymt.c:148
void bl1_dgemm(trans1_t transa, trans1_t transb, int m, int k, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs, double *beta, double *c, int c_rs, int c_cs)
Definition bl1_gemm.c:274
dcomplex bl1_z1(void)
Definition bl1_constants.c:69
double bl1_d0(void)
Definition bl1_constants.c:118
void bl1_dident(int m, double *a, int a_rs, int a_cs)
Definition bl1_ident.c:32
double bl1_d1(void)
Definition bl1_constants.c:54
void bl1_zsetm(int m, int n, dcomplex *sigma, dcomplex *a, int a_rs, int a_cs)
Definition bl1_setm.c:78
@ BLIS1_NO_TRANSPOSE
Definition blis_type_defs.h:54
Definition blis_type_defs.h:138

References bl1_d0(), bl1_d1(), bl1_dcopymt(), bl1_dgemm(), bl1_dident(), bl1_z1(), bl1_zsetm(), BLIS1_NO_TRANSPOSE, FLA_Abort(), FLA_Apply_G_rf_bld_var3b(), FLA_Tevd_find_submatrix_opd(), FLA_Tevd_iteracc_v_opd_var1(), and i.

Referenced by FLA_Tevd_v_opt_var2().

◆ FLA_Tevd_v_ops_var2()

FLA_Error FLA_Tevd_v_ops_var2 ( int  m_A,
int  m_U,
int  n_G,
int  n_iter_max,
float buff_d,
int  inc_d,
float buff_e,
int  inc_e,
scomplex buff_G,
int  rs_G,
int  cs_G,
float buff_R,
int  rs_R,
int  cs_R,
float buff_W,
int  rs_W,
int  cs_W,
float buff_U,
int  rs_U,
int  cs_U,
int  b_alg 
)
162{
164
165 return FLA_SUCCESS;
166}

References i.

Referenced by FLA_Tevd_v_opt_var2().

◆ FLA_Tevd_v_opt_var2()

FLA_Error FLA_Tevd_v_opt_var2 ( dim_t  n_iter_max,
FLA_Obj  d,
FLA_Obj  e,
FLA_Obj  G,
FLA_Obj  R,
FLA_Obj  W,
FLA_Obj  U,
dim_t  b_alg 
)
14{
16 FLA_Datatype datatype;
17 int m_A, m_U, n_G;
18 int inc_d;
19 int inc_e;
20 int rs_G, cs_G;
21 int rs_R, cs_R;
22 int rs_U, cs_U;
23 int rs_W, cs_W;
24
25 datatype = FLA_Obj_datatype( U );
26
28 m_U = FLA_Obj_length( U );
29 n_G = FLA_Obj_width( G );
30
33
36
39
42
45
46
47 switch ( datatype )
48 {
49 case FLA_FLOAT:
50 {
51 float* buff_d = FLA_FLOAT_PTR( d );
52 float* buff_e = FLA_FLOAT_PTR( e );
54 float* buff_R = FLA_FLOAT_PTR( R );
55 float* buff_W = FLA_FLOAT_PTR( W );
56 float* buff_U = FLA_FLOAT_PTR( U );
57
59 m_U,
60 n_G,
68 b_alg );
69
70 break;
71 }
72
73 case FLA_DOUBLE:
74 {
75 double* buff_d = FLA_DOUBLE_PTR( d );
76 double* buff_e = FLA_DOUBLE_PTR( e );
78 double* buff_R = FLA_DOUBLE_PTR( R );
79 double* buff_W = FLA_DOUBLE_PTR( W );
80 double* buff_U = FLA_DOUBLE_PTR( U );
81
83 m_U,
84 n_G,
92 b_alg );
93
94 break;
95 }
96
97 case FLA_COMPLEX:
98 {
99 float* buff_d = FLA_FLOAT_PTR( d );
100 float* buff_e = FLA_FLOAT_PTR( e );
102 float* buff_R = FLA_FLOAT_PTR( R );
105
107 m_U,
108 n_G,
110 buff_d, inc_d,
111 buff_e, inc_e,
112 buff_G, rs_G, cs_G,
113 buff_R, rs_R, cs_R,
114 buff_W, rs_W, cs_W,
115 buff_U, rs_U, cs_U,
116 b_alg );
117
118 break;
119 }
120
122 {
123 double* buff_d = FLA_DOUBLE_PTR( d );
124 double* buff_e = FLA_DOUBLE_PTR( e );
126 double* buff_R = FLA_DOUBLE_PTR( R );
129
131 m_U,
132 n_G,
134 buff_d, inc_d,
135 buff_e, inc_e,
136 buff_G, rs_G, cs_G,
137 buff_R, rs_R, cs_R,
138 buff_W, rs_W, cs_W,
139 buff_U, rs_U, cs_U,
140 b_alg );
141
142 break;
143 }
144 }
145
146 return r_val;
147}
FLA_Error FLA_Tevd_v_opc_var2(int m_A, int m_U, int n_G, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, float *buff_R, int rs_R, int cs_R, scomplex *buff_W, int rs_W, int cs_W, scomplex *buff_U, int rs_U, int cs_U, int b_alg)
Definition FLA_Tevd_v_opt_var2.c:405
FLA_Error FLA_Tevd_v_ops_var2(int m_A, int m_U, int n_G, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, float *buff_R, int rs_R, int cs_R, float *buff_W, int rs_W, int cs_W, float *buff_U, int rs_U, int cs_U, int b_alg)
Definition FLA_Tevd_v_opt_var2.c:151
FLA_Error FLA_Tevd_v_opd_var2(int m_A, int m_U, int n_G, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, double *buff_R, int rs_R, int cs_R, double *buff_W, int rs_W, int cs_W, double *buff_U, int rs_U, int cs_U, int b_alg)
Definition FLA_Tevd_v_opt_var2.c:170
FLA_Error FLA_Tevd_v_opz_var2(int m_A, int m_U, int n_G, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, double *buff_R, int rs_R, int cs_R, dcomplex *buff_W, int rs_W, int cs_W, dcomplex *buff_U, int rs_U, int cs_U, int b_alg)
Definition FLA_Tevd_v_opt_var2.c:424
dim_t FLA_Obj_width(FLA_Obj obj)
Definition FLA_Query.c:123
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition FLA_Query.c:167
dim_t FLA_Obj_length(FLA_Obj obj)
Definition FLA_Query.c:116
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition FLA_Query.c:174
dim_t FLA_Obj_vector_inc(FLA_Obj obj)
Definition FLA_Query.c:145
dim_t FLA_Obj_vector_dim(FLA_Obj obj)
Definition FLA_Query.c:137
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition FLA_Query.c:13
int FLA_Error
Definition FLA_type_defs.h:47
int FLA_Datatype
Definition FLA_type_defs.h:49
Definition blis_type_defs.h:133

References FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_dim(), FLA_Obj_vector_inc(), FLA_Obj_width(), FLA_Tevd_v_opc_var2(), FLA_Tevd_v_opd_var2(), FLA_Tevd_v_ops_var2(), FLA_Tevd_v_opz_var2(), and i.

Referenced by FLA_Hevd_lv_unb_var2().

◆ FLA_Tevd_v_opz_var2()

FLA_Error FLA_Tevd_v_opz_var2 ( int  m_A,
int  m_U,
int  n_G,
int  n_iter_max,
double buff_d,
int  inc_d,
double buff_e,
int  inc_e,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double buff_R,
int  rs_R,
int  cs_R,
dcomplex buff_W,
int  rs_W,
int  cs_W,
dcomplex buff_U,
int  rs_U,
int  cs_U,
int  b_alg 
)
435{
436 dcomplex one = bl1_z1();
437 double rone = bl1_d1();
438 double rzero = bl1_d0();
439
440 dcomplex* G;
441 double* d1;
442 double* e1;
443 int r_val;
444 int done;
445 int m_G_sweep_max;
446 int ij_begin;
447 int ijTL, ijBR;
448 int m_A11;
449 int n_iter_perf;
450 int n_U_apply;
452 int n_deflations;
453 int n_iter_prev;
455
456 // Initialize our completion flag.
457 done = FALSE;
458
459 // Initialize a counter that holds the maximum number of rows of G
460 // that we would need to initialize for the next sweep.
461 m_G_sweep_max = m_A - 1;
462
463 // Initialize a counter for the total number of iterations performed.
464 n_iter_prev = 0;
465
466 // Initialize R to identity.
468 buff_R, rs_R, cs_R );
469
470 // Iterate until the matrix has completely deflated.
471 for ( total_deflations = 0; done != TRUE; )
472 {
473
474 // Initialize G to contain only identity rotations.
476 n_G,
477 &one,
478 buff_G, rs_G, cs_G );
479
480 // Keep track of the maximum number of iterations performed in the
481 // current sweep. This is used when applying the sweep's Givens
482 // rotations.
484
485 // Perform a sweep: Move through the matrix and perform a tridiagonal
486 // EVD on each non-zero submatrix that is encountered. During the
487 // first time through, ijTL will be 0 and ijBR will be m_A - 1.
488 for ( ij_begin = 0; ij_begin < m_A; )
489 {
490
491#ifdef PRINTF
492if ( ij_begin == 0 )
493printf( "FLA_Tevd_v_opz_var2: beginning new sweep (ij_begin = %d)\n", ij_begin );
494#endif
495
496 // Search for the first submatrix along the diagonal that is
497 // bounded by zeroes (or endpoints of the matrix). If no
498 // submatrix is found (ie: if the entire subdiagonal is zero
499 // then FLA_FAILURE is returned. This function also inspects
500 // subdiagonal elements for proximity to zero. If a given
501 // element is close enough to zero, then it is deemed
502 // converged and manually set to zero.
504 ij_begin,
505 buff_d, inc_d,
506 buff_e, inc_e,
507 &ijTL,
508 &ijBR );
509
510 // Verify that a submatrix was found. If one was not found,
511 // then we are done with the current sweep. Furthermore, if
512 // a submatrix was not found AND we began our search at the
513 // beginning of the matrix (ie: ij_begin == 0), then the
514 // matrix has completely deflated and so we are done with
515 // Francis step iteration.
516 if ( r_val == FLA_FAILURE )
517 {
518 if ( ij_begin == 0 )
519 {
520#ifdef PRINTF
521printf( "FLA_Tevd_v_opz_var2: subdiagonal is completely zero.\n" );
522printf( "FLA_Tevd_v_opz_var2: Francis iteration is done!\n" );
523#endif
524 done = TRUE;
525 }
526
527 // Break out of the current sweep so we can apply the last
528 // remaining Givens rotations.
529 break;
530 }
531
532 // If we got this far, then:
533 // (a) ijTL refers to the index of the first non-zero
534 // subdiagonal along the diagonal, and
535 // (b) ijBR refers to either:
536 // - the first zero element that occurs after ijTL, or
537 // - the the last diagonal element.
538 // Note that ijTL and ijBR also correspond to the first and
539 // last diagonal elements of the submatrix of interest. Thus,
540 // we may compute the dimension of this submatrix as:
541 m_A11 = ijBR - ijTL + 1;
542
543#ifdef PRINTF
544printf( "FLA_Tevd_v_opz_var2: ij_begin = %d\n", ij_begin );
545printf( "FLA_Tevd_v_opz_var2: ijTL = %d\n", ijTL );
546printf( "FLA_Tevd_v_opz_var2: ijBR = %d\n", ijBR );
547printf( "FLA_Tevd_v_opz_var2: m_A11 = %d\n", m_A11 );
548#endif
549
550 // Adjust ij_begin, which gets us ready for the next subproblem, if
551 // there is one.
552 ij_begin = ijBR + 1;
553
554 // Index to the submatrices upon which we will operate.
555 d1 = buff_d + ijTL * inc_d;
556 e1 = buff_e + ijTL * inc_e;
557 G = buff_G + ijTL * rs_G;
558
559 // Search for a batch of eigenvalues, recursing on deflated
560 // subproblems whenever a split occurs. Iteration continues
561 // as long as
562 // (a) there is still matrix left to operate on, and
563 // (b) the number of iterations performed in this batch is
564 // less than n_G.
565 // If/when either of the two above conditions fails to hold,
566 // the function returns.
568 n_G,
569 ijTL,
570 d1, inc_d,
571 e1, inc_e,
572 G, rs_G, cs_G,
573 &n_iter_perf );
574
575 // Record the number of deflations that we observed.
577
578 // Update the maximum number of iterations performed in the
579 // current sweep.
581
582#ifdef PRINTF
583printf( "FLA_Tevd_v_opz_var2: deflations observed = %d\n", n_deflations );
584printf( "FLA_Tevd_v_opz_var2: total deflations observed = %d\n", total_deflations );
585printf( "FLA_Tevd_v_opz_var2: num iterations = %d\n", n_iter_perf );
586#endif
587
588 // Store the most recent value of ijBR in m_G_sweep_max.
589 // When the sweep is done, this value will contain the minimum
590 // number of rows of G we can apply and safely include all
591 // non-identity rotations that were computed during the
592 // eigenvalue searches.
594
595 // Make sure we haven't exceeded our maximum iteration count.
596 if ( n_iter_prev >= m_A * n_iter_max )
597 {
598#ifdef PRINTF
599printf( "FLA_Tevd_v_opz_var2: reached maximum total number of iterations: %d\n", n_iter_prev );
600#endif
601 FLA_Abort();
602 //return FLA_FAILURE;
603 }
604 }
605
606 // The sweep is complete. Now we must apply the Givens rotations
607 // that were accumulated during the sweep.
608
609
610 // Recall that the number of columns of U to which we apply
611 // rotations is one more than the number of rotations.
613
614 // Apply the Givens rotations that were computed as part of
615 // the previous batch of iterations.
616 //FLA_Apply_G_rf_bld_var8b( n_iter_perf_sweep_max,
617 //FLA_Apply_G_rf_bld_var5b( n_iter_perf_sweep_max,
619 //FLA_Apply_G_rf_bld_var9b( n_iter_perf_sweep_max,
620 //FLA_Apply_G_rf_bld_var6b( n_iter_perf_sweep_max,
621 m_U,
622 n_U_apply,
624 buff_G, rs_G, cs_G,
625 buff_R, rs_R, cs_R,
626 b_alg );
627
628#ifdef PRINTF
629printf( "FLA_Tevd_v_opz_var2: applying %d sets of Givens rotations\n", n_iter_perf_sweep_max );
630#endif
631
632 // Increment the total number of iterations previously performed.
634 }
635
636 // Copy the contents of Q to temporary storage.
638 m_A,
639 m_A,
640 buff_U, rs_U, cs_U,
641 buff_W, rs_W, cs_W );
642
643
644 // Multiply Q by R, overwriting U.
647 2*m_A,
648 m_A,
649 m_A,
650 &rone,
651 ( double* )buff_W, rs_W, 2*cs_W,
652 buff_R, rs_R, cs_R,
653 &rzero,
654 ( double* )buff_U, rs_U, 2*cs_U );
655
656 return n_iter_prev;
657}
void bl1_zcopymt(trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
Definition bl1_copymt.c:286

References bl1_d0(), bl1_d1(), bl1_dgemm(), bl1_dident(), bl1_z1(), bl1_zcopymt(), bl1_zsetm(), BLIS1_NO_TRANSPOSE, FLA_Abort(), FLA_Apply_G_rf_bld_var3b(), FLA_Tevd_find_submatrix_opd(), FLA_Tevd_iteracc_v_opd_var1(), and i.

Referenced by FLA_Tevd_v_opt_var2().