libflame revision_anchor
Functions
FLA_Tevd_v.h File Reference

(r)

Go to the source code of this file.

Functions

FLA_Error FLA_Tevd_compute_scaling_ops (int m_A, float *buff_d, int inc_d, float *buff_e, int inc_e, float *sigma)
 
FLA_Error FLA_Tevd_compute_scaling_opd (int m_A, double *buff_d, int inc_d, double *buff_e, int inc_e, double *sigma)
 
FLA_Error FLA_Tevd_find_submatrix_ops (int m_A, int ij_begin, float *buff_d, int inc_d, float *buff_e, int inc_e, int *ijTL, int *ijBR)
 
FLA_Error FLA_Tevd_find_submatrix_opd (int m_A, int ij_begin, double *buff_d, int inc_d, double *buff_e, int inc_e, int *ijTL, int *ijBR)
 
FLA_Error FLA_Tevd_find_perfshift_ops (int m_d, int m_l, float *buff_d, int inc_d, float *buff_e, int inc_e, float *buff_l, int inc_l, int *buff_lstat, int inc_lstat, float *buff_pu, int inc_pu, int *ij_shift)
 
FLA_Error FLA_Tevd_find_perfshift_opd (int m_d, int m_l, double *buff_d, int inc_d, double *buff_e, int inc_e, double *buff_l, int inc_l, int *buff_lstat, int inc_lstat, double *buff_pu, int inc_pu, int *ij_shift)
 
FLA_Error FLA_Norm1_tridiag (FLA_Obj d, FLA_Obj e, FLA_Obj norm)
 
FLA_Error FLA_Norm1_tridiag_ops (int m_A, float *buff_d, int inc_d, float *buff_e, int inc_e, float *norm)
 
FLA_Error FLA_Norm1_tridiag_opd (int m_A, double *buff_d, int inc_d, double *buff_e, int inc_e, double *norm)
 
FLA_Error FLA_Tevd_v_opt_var1 (dim_t n_iter_max, FLA_Obj d, FLA_Obj e, FLA_Obj G, FLA_Obj U, dim_t b_alg)
 
FLA_Error FLA_Tevd_v_ops_var1 (int m_A, int m_U, int n_G, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, float *buff_U, int rs_U, int cs_U, int b_alg)
 
FLA_Error FLA_Tevd_v_opd_var1 (int m_A, int m_U, int n_G, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, double *buff_U, int rs_U, int cs_U, int b_alg)
 
FLA_Error FLA_Tevd_v_opc_var1 (int m_A, int m_U, int n_G, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_U, int rs_U, int cs_U, int b_alg)
 
FLA_Error FLA_Tevd_v_opz_var1 (int m_A, int m_U, int n_G, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_U, int rs_U, int cs_U, int b_alg)
 
FLA_Error FLA_Tevd_v_opt_var2 (dim_t n_iter_max, FLA_Obj d, FLA_Obj e, FLA_Obj G, FLA_Obj R, FLA_Obj W, FLA_Obj U, dim_t b_alg)
 
FLA_Error FLA_Tevd_v_ops_var2 (int m_A, int m_U, int n_G, int n_G_extra, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, float *buff_R, int rs_R, int cs_R, float *buff_W, int rs_W, int cs_W, float *buff_U, int rs_U, int cs_U, int b_alg)
 
FLA_Error FLA_Tevd_v_opd_var2 (int m_A, int m_U, int n_G, int n_G_extra, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, double *buff_R, int rs_R, int cs_R, double *buff_W, int rs_W, int cs_W, double *buff_U, int rs_U, int cs_U, int b_alg)
 
FLA_Error FLA_Tevd_v_opc_var2 (int m_A, int m_U, int n_G, int n_G_extra, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, float *buff_R, int rs_R, int cs_R, scomplex *buff_W, int rs_W, int cs_W, scomplex *buff_U, int rs_U, int cs_U, int b_alg)
 
FLA_Error FLA_Tevd_v_opz_var2 (int m_A, int m_U, int n_G, int n_G_extra, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, double *buff_R, int rs_R, int cs_R, dcomplex *buff_W, int rs_W, int cs_W, dcomplex *buff_U, int rs_U, int cs_U, int b_alg)
 

Function Documentation

◆ FLA_Norm1_tridiag()

FLA_Error FLA_Norm1_tridiag ( FLA_Obj  d,
FLA_Obj  e,
FLA_Obj  norm 
)
14{
15 FLA_Datatype datatype;
16 int m_A;
17 int inc_d;
18 int inc_e;
19
20 datatype = FLA_Obj_datatype( d );
21
23
26
27
28 switch ( datatype )
29 {
30 case FLA_FLOAT:
31 {
32 float* buff_d = FLA_FLOAT_PTR( d );
33 float* buff_e = FLA_FLOAT_PTR( e );
34 float* buff_norm = FLA_FLOAT_PTR( norm );
35
39 buff_norm );
40
41 break;
42 }
43
44 case FLA_DOUBLE:
45 {
46 double* buff_d = FLA_DOUBLE_PTR( d );
47 double* buff_e = FLA_DOUBLE_PTR( e );
48 double* buff_norm = FLA_DOUBLE_PTR( norm );
49
53 buff_norm );
54
55 break;
56 }
57 }
58
59 return FLA_SUCCESS;
60}
FLA_Error FLA_Norm1_tridiag_opd(int m_A, double *buff_d, int inc_d, double *buff_e, int inc_e, double *norm)
Definition FLA_Norm1_tridiag.c:111
FLA_Error FLA_Norm1_tridiag_ops(int m_A, float *buff_d, int inc_d, float *buff_e, int inc_e, float *norm)
Definition FLA_Norm1_tridiag.c:64
dim_t FLA_Obj_vector_inc(FLA_Obj obj)
Definition FLA_Query.c:145
dim_t FLA_Obj_vector_dim(FLA_Obj obj)
Definition FLA_Query.c:137
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition FLA_Query.c:13
int FLA_Datatype
Definition FLA_type_defs.h:49
int i
Definition bl1_axmyv2.c:145

References FLA_Norm1_tridiag_opd(), FLA_Norm1_tridiag_ops(), FLA_Obj_datatype(), FLA_Obj_vector_dim(), FLA_Obj_vector_inc(), and i.

◆ FLA_Norm1_tridiag_opd()

FLA_Error FLA_Norm1_tridiag_opd ( int  m_A,
double buff_d,
int  inc_d,
double buff_e,
int  inc_e,
double norm 
)
115{
116 double* d = buff_d;
117 double* e = buff_e;
118 double nm;
119 int i;
120
121 if ( m_A == 1 )
122 {
123 nm = fabs( *d );
124 }
125 else
126 {
127 double d_first = d[ (0 )*inc_d ];
128 double e_first = e[ (0 )*inc_e ];
129 double e_last = e[ (m_A-2)*inc_e ];
130 double d_last = d[ (m_A-1)*inc_d ];
131
132 // Record the maximum of the absolute row/column sums for the
133 // first and last row/columns.
134 nm = max( fabs( d_first ) + fabs( e_first ),
135 fabs( e_last ) + fabs( d_last ) );
136
137 for ( i = 1; i < m_A - 2; ++i )
138 {
139 double e0 = e[ (i-1)*inc_e ];
140 double e1 = e[ (i )*inc_e ];
141 double d1 = d[ (i )*inc_d ];
142
143 // Update nm with the absolute row/column sum for the ith
144 // row/column.
145 nm = max( nm, fabs( e0 ) +
146 fabs( d1 ) +
147 fabs( e1 ) );
148 }
149 }
150
151 *norm = nm;
152
153 return FLA_SUCCESS;
154}

References i.

Referenced by FLA_Norm1_tridiag(), and FLA_Tevd_compute_scaling_opd().

◆ FLA_Norm1_tridiag_ops()

FLA_Error FLA_Norm1_tridiag_ops ( int  m_A,
float buff_d,
int  inc_d,
float buff_e,
int  inc_e,
float norm 
)
68{
69 float* d = buff_d;
70 float* e = buff_e;
71 float nm;
72 int i;
73
74 if ( m_A == 1 )
75 {
76 nm = fabs( *d );
77 }
78 else
79 {
80 float d_first = d[ (0 )*inc_d ];
81 float e_first = e[ (0 )*inc_e ];
82 float e_last = e[ (m_A-2)*inc_e ];
83 float d_last = d[ (m_A-1)*inc_d ];
84
85 // Record the maximum of the absolute row/column sums for the
86 // first and last row/columns.
87 nm = max( fabs( d_first ) + fabs( e_first ),
88 fabs( e_last ) + fabs( d_last ) );
89
90 for ( i = 1; i < m_A - 2; ++i )
91 {
92 float e0 = e[ (i-1)*inc_e ];
93 float e1 = e[ (i )*inc_e ];
94 float d1 = d[ (i )*inc_d ];
95
96 // Update nm with the absolute row/column sum for the ith
97 // row/column.
98 nm = max( nm, fabs( e0 ) +
99 fabs( d1 ) +
100 fabs( e1 ) );
101 }
102 }
103
104 *norm = nm;
105
106 return FLA_SUCCESS;
107}

References i.

Referenced by FLA_Norm1_tridiag(), and FLA_Tevd_compute_scaling_ops().

◆ FLA_Tevd_compute_scaling_opd()

FLA_Error FLA_Tevd_compute_scaling_opd ( int  m_A,
double buff_d,
int  inc_d,
double buff_e,
int  inc_e,
double sigma 
)
63{
64 double one = bl1_d1();
65 double three = 3.0;
66 double norm;
67 double eps2;
68 double safmin;
69 double safmax;
70 double ssfmin;
71 double ssfmax;
72
73 // Query some constants.
76 safmax = one / safmin;
77
78 // Compute the acceptable range for the 1-norm;
79 ssfmax = sqrt( safmax ) / three;
80 ssfmin = sqrt( safmin ) / eps2;
81
82 // Compute the 1-norm of the tridiagonal matrix.
86 &norm );
87
88 // Compute sigma accordingly if norm is outside of the range.
89 if ( norm > ssfmax )
90 {
91 *sigma = ssfmax / norm;
92 }
93 else if ( norm < ssfmin )
94 {
95 *sigma = ssfmin / norm;
96 }
97 else
98 {
99 *sigma = one;
100 }
101
102 return FLA_SUCCESS;
103}
double FLA_Mach_params_opd(FLA_Machval machval)
Definition FLA_Mach_params.c:74
double bl1_d1(void)
Definition bl1_constants.c:54

References bl1_d1(), FLA_Mach_params_opd(), FLA_Norm1_tridiag_opd(), and i.

◆ FLA_Tevd_compute_scaling_ops()

FLA_Error FLA_Tevd_compute_scaling_ops ( int  m_A,
float buff_d,
int  inc_d,
float buff_e,
int  inc_e,
float sigma 
)
17{
18 float one = bl1_s1();
19 float three = 3.0F;
20 float norm;
21 float eps2;
22 float safmin;
23 float safmax;
24 float ssfmin;
25 float ssfmax;
26
27 // Query some constants.
30 safmax = one / safmin;
31
32 // Compute the acceptable range for the 1-norm;
33 ssfmax = sqrt( safmax ) / three;
34 ssfmin = sqrt( safmin ) / eps2;
35
36 // Compute the 1-norm of the tridiagonal matrix.
40 &norm );
41
42 // Compute sigma accordingly if norm is outside of the range.
43 if ( norm > ssfmax )
44 {
45 *sigma = ssfmax / norm;
46 }
47 else if ( norm < ssfmin )
48 {
49 *sigma = ssfmin / norm;
50 }
51 else
52 {
53 *sigma = one;
54 }
55
56 return FLA_SUCCESS;
57}
float FLA_Mach_params_ops(FLA_Machval machval)
Definition FLA_Mach_params.c:47
float bl1_s1(void)
Definition bl1_constants.c:47

References bl1_s1(), FLA_Mach_params_ops(), FLA_Norm1_tridiag_ops(), and i.

◆ FLA_Tevd_find_perfshift_opd()

FLA_Error FLA_Tevd_find_perfshift_opd ( int  m_d,
int  m_l,
double buff_d,
int  inc_d,
double buff_e,
int  inc_e,
double buff_l,
int  inc_l,
int buff_lstat,
int  inc_lstat,
double buff_pu,
int  inc_pu,
int ij_shift 
)
38{
39 double* d1p;
40 double* e1p;
41 double* d2p;
42 double wilkshift;
43 int i;
44 int ij_cand;
45 double dist_cand;
46 double pshift_cand;
47
48 d1p = buff_d + (m_d-2)*inc_d;
49 e1p = buff_e + (m_d-2)*inc_e;
50 d2p = buff_d + (m_d-1)*inc_d;
51
52 if ( *buff_ls == -1 )
53 {
54 *ij_shift = -1;
55 return FLA_FAILURE;
56 }
57
59 *e1p,
60 *d2p,
61 &wilkshift );
62
63/*
64 // If we have shifted here previously, use a Wilkinson shfit.
65 prev_shift = buff_pu[ (m_d-1)*inc_pu ];
66
67 if ( prev_shift != 0.0 )
68 {
69 // *shift = prev_shift;
70 *shift = wilkshift;
71 return FLA_SUCCESS;
72 }
73*/
74 ij_cand = -1;
75
76 // Find an available (unused) shift.
77 for ( i = 0; i < m_l; ++i )
78 {
79 int* status = buff_ls + (i )*inc_ls;
80
81 if ( *status == 0 )
82 {
83 double* lambda1 = buff_l + (i )*inc_l;
84 ij_cand = i;
87 }
88 }
89
90 if ( ij_cand == -1 )
91 {
92 *ij_shift = -1;
93 *buff_ls = -1;
94 return FLA_FAILURE;
95 }
96
97 // Now try to find a shift closer to wilkshift than the
98 // first one we found.
99 for ( i = 0; i < m_l; ++i )
100 {
101 double* lambda1 = buff_l + (i )*inc_l;
102 int* status = buff_ls + (i )*inc_ls;
103 double dist = fabs( wilkshift - *lambda1 );
104
105 if ( *status == 1 ) continue;
106
107 if ( dist < dist_cand )
108 {
109 ij_cand = i;
111 dist_cand = dist;
112 }
113 }
114
115 *ij_shift = ij_cand;
116
117 return FLA_SUCCESS;
118}
FLA_Error FLA_Wilkshift_tridiag_opd(double delta1, double epsilon, double delta2, double *kappa)
Definition FLA_Wilkshift_tridiag.c:155

References FLA_Wilkshift_tridiag_opd(), and i.

Referenced by FLA_Tevd_eigval_v_opd_var3().

◆ FLA_Tevd_find_perfshift_ops()

FLA_Error FLA_Tevd_find_perfshift_ops ( int  m_d,
int  m_l,
float buff_d,
int  inc_d,
float buff_e,
int  inc_e,
float buff_l,
int  inc_l,
int buff_lstat,
int  inc_lstat,
float buff_pu,
int  inc_pu,
int ij_shift 
)

References i.

◆ FLA_Tevd_find_submatrix_opd()

FLA_Error FLA_Tevd_find_submatrix_opd ( int  m_A,
int  ij_begin,
double buff_d,
int  inc_d,
double buff_e,
int  inc_e,
int ijTL,
int ijBR 
)
34{
35 double rzero = bl1_d0();
36 double eps;
37 int ij_tl;
38 int ij_br;
39
40 // Initialize some numerical constants.
42
43 // Search for the first non-zero subdiagonal element starting at
44 // the index specified by ij_begin.
45 for ( ij_tl = ij_begin; ij_tl < m_A - 1; ++ij_tl )
46 {
47 double* d1 = buff_d + (ij_tl )*inc_d;
48 double* d2 = buff_d + (ij_tl+1)*inc_d;
49 double* e1 = buff_e + (ij_tl )*inc_e;
50 double abs_e1 = fabs( *e1 );
51
52 // If we encounter a non-zero subdiagonal element that is close
53 // enough to zero, set it to zero.
54 if ( abs_e1 != rzero )
55 {
56 if ( abs_e1 <= eps * sqrt( fabs( *d1 ) ) *
57 sqrt( fabs( *d2 ) ) )
58 {
59#ifdef PRINTF
60printf( "FLA_Tevd_find_submatrix_opd: nudging non-zero subdiagonal element (e1) to zero.\n" );
61printf( " d[%3d] = %22.19e\n", ij_tl, *d1 );
62printf( " e[%3d] d[%3d] = %22.19e %22.19e\n", ij_tl, ij_tl+1, *e1, *d2 );
63#endif
64 *e1 = rzero;
65 }
66 }
67
68 // If we find a non-zero element, record it and break out of this
69 // loop.
70 if ( *e1 != rzero )
71 {
72#ifdef PRINTF
73printf( "FLA_Tevd_find_submatrix_opd: found non-zero subdiagonal element\n" );
74printf( " e[%3d] = %22.19e\n", ij_tl, *e1 );
75#endif
76 *ijTL = ij_tl;
77 break;
78 }
79 }
80
81 // If ij_tl was incremented all the way up to m_A - 1, then we didn't
82 // find any non-zeros.
83 if ( ij_tl == m_A - 1 )
84 {
85#ifdef PRINTF
86printf( "FLA_Tevd_find_submatrix_opd: no submatrices found.\n" );
87#endif
88 return FLA_FAILURE;
89 }
90
91 // If we've gotten this far, then a non-zero subdiagonal element was
92 // found. Now we must walk the remaining portion of the subdiagonal
93 // to find the first zero element, or if one is not found, we simply
94 // use the last element of the subdiagonal.
95 for ( ij_br = ij_tl; ij_br < m_A - 1; ++ij_br )
96 {
97 double* d1 = buff_d + (ij_br )*inc_d;
98 double* d2 = buff_d + (ij_br+1)*inc_d;
99 double* e1 = buff_e + (ij_br )*inc_e;
100 double abs_e1 = fabs( *e1 );
101
102 // If we encounter a non-zero subdiagonal element that is close
103 // enough to zero, set it to zero.
104 if ( abs_e1 != rzero )
105 {
106 if ( abs_e1 <= eps * sqrt( fabs( *d1 ) ) *
107 sqrt( fabs( *d2 ) ) )
108 {
109#ifdef PRINTF
110printf( "FLA_Tevd_find_submatrix_opd: nudging non-zero subdiagonal element (e1) to zero.\n" );
111printf( " d[%3d] = %22.19e\n", ij_br, *d1 );
112printf( " e[%3d] d[%3d] = %22.19e %22.19e\n", ij_br, ij_br+1, *e1, *d2 );
113#endif
114 *e1 = rzero;
115 }
116 }
117
118 // If we find a zero element, record it and break out of this
119 // loop.
120 if ( *e1 == rzero )
121 {
122#ifdef PRINTF
123printf( "FLA_Tevd_find_submatrix_opd: found zero subdiagonal element\n" );
124printf( " e[%3d] = %22.19e\n", ij_br, *e1 );
125#endif
126 break;
127 }
128 }
129
130 // If a zero element was found, then ij_br should hold the index of
131 // that element. If a zero element was not found, then ij_br should
132 // hold m_A - 1. Either way, we save the value and return success.
133 *ijBR = ij_br;
134
135 return FLA_SUCCESS;
136}
double bl1_d0(void)
Definition bl1_constants.c:118

◆ FLA_Tevd_find_submatrix_ops()

FLA_Error FLA_Tevd_find_submatrix_ops ( int  m_A,
int  ij_begin,
float buff_d,
int  inc_d,
float buff_e,
int  inc_e,
int ijTL,
int ijBR 
)

◆ FLA_Tevd_v_opc_var1()

FLA_Error FLA_Tevd_v_opc_var1 ( int  m_A,
int  m_U,
int  n_G,
int  n_iter_max,
float buff_d,
int  inc_d,
float buff_e,
int  inc_e,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_U,
int  rs_U,
int  cs_U,
int  b_alg 
)
374{
376
377 return FLA_SUCCESS;
378}

References i.

Referenced by FLA_Tevd_v_opt_var1().

◆ FLA_Tevd_v_opc_var2()

FLA_Error FLA_Tevd_v_opc_var2 ( int  m_A,
int  m_U,
int  n_G,
int  n_G_extra,
float buff_d,
int  inc_d,
float buff_e,
int  inc_e,
scomplex buff_G,
int  rs_G,
int  cs_G,
float buff_R,
int  rs_R,
int  cs_R,
scomplex buff_W,
int  rs_W,
int  cs_W,
scomplex buff_U,
int  rs_U,
int  cs_U,
int  b_alg 
)
416{
418
419 return FLA_SUCCESS;
420}

References i.

Referenced by FLA_Tevd_v_opt_var2().

◆ FLA_Tevd_v_opd_var1()

FLA_Error FLA_Tevd_v_opd_var1 ( int  m_A,
int  m_U,
int  n_G,
int  n_iter_max,
double buff_d,
int  inc_d,
double buff_e,
int  inc_e,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double buff_U,
int  rs_U,
int  cs_U,
int  b_alg 
)
153{
154 dcomplex one = bl1_z1();
155
156 dcomplex* G;
157 double* d1;
158 double* e1;
159 int r_val;
160 int done;
161 int m_G_sweep_max;
162 int ij_begin;
163 int ijTL, ijBR;
164 int m_A11;
165 int n_iter_perf;
166 int n_U_apply;
168 int n_deflations;
169 int n_iter_prev;
171
172 // Initialize our completion flag.
173 done = FALSE;
174
175 // Initialize a counter that holds the maximum number of rows of G
176 // that we would need to initialize for the next sweep.
177 m_G_sweep_max = m_A - 1;
178
179 // Initialize a counter for the total number of iterations performed.
180 n_iter_prev = 0;
181
182 // Iterate until the matrix has completely deflated.
183 for ( total_deflations = 0; done != TRUE; )
184 {
185 // Initialize G to contain only identity rotations.
187 n_G,
188 &one,
189 buff_G, rs_G, cs_G );
190
191 // Keep track of the maximum number of iterations performed in the
192 // current sweep. This is used when applying the sweep's Givens
193 // rotations.
195
196 // Perform a sweep: Move through the matrix and perform a tridiagonal
197 // EVD on each non-zero submatrix that is encountered. During the
198 // first time through, ijTL will be 0 and ijBR will be m_A - 1.
199 for ( ij_begin = 0; ij_begin < m_A; )
200 {
201
202#ifdef PRINTF
203if ( ij_begin == 0 )
204printf( "FLA_Tevd_v_opd_var1: beginning new sweep (ij_begin = %d)\n", ij_begin );
205#endif
206
207 // Search for the first submatrix along the diagonal that is
208 // bounded by zeroes (or endpoints of the matrix). If no
209 // submatrix is found (ie: if the entire subdiagonal is zero
210 // then FLA_FAILURE is returned. This function also inspects
211 // subdiagonal elements for proximity to zero. If a given
212 // element is close enough to zero, then it is deemed
213 // converged and manually set to zero.
215 ij_begin,
216 buff_d, inc_d,
217 buff_e, inc_e,
218 &ijTL,
219 &ijBR );
220
221 // Verify that a submatrix was found. If one was not found,
222 // then we are done with the current sweep. Furthermore, if
223 // a submatrix was not found AND we began our search at the
224 // beginning of the matrix (ie: ij_begin == 0), then the
225 // matrix has completely deflated and so we are done with
226 // Francis step iteration.
227 if ( r_val == FLA_FAILURE )
228 {
229 if ( ij_begin == 0 )
230 {
231#ifdef PRINTF
232printf( "FLA_Tevd_v_opd_var1: subdiagonal is completely zero.\n" );
233printf( "FLA_Tevd_v_opd_var1: Francis iteration is done!\n" );
234#endif
235 done = TRUE;
236 }
237
238 // Break out of the current sweep so we can apply the last
239 // remaining Givens rotations.
240 break;
241 }
242
243 // If we got this far, then:
244 // (a) ijTL refers to the index of the first non-zero
245 // subdiagonal along the diagonal, and
246 // (b) ijBR refers to either:
247 // - the first zero element that occurs after ijTL, or
248 // - the the last diagonal element.
249 // Note that ijTL and ijBR also correspond to the first and
250 // last diagonal elements of the submatrix of interest. Thus,
251 // we may compute the dimension of this submatrix as:
252 m_A11 = ijBR - ijTL + 1;
253
254#ifdef PRINTF
255printf( "FLA_Tevd_v_opd_var1: ij_begin = %d\n", ij_begin );
256printf( "FLA_Tevd_v_opd_var1: ijTL = %d\n", ijTL );
257printf( "FLA_Tevd_v_opd_var1: ijBR = %d\n", ijBR );
258printf( "FLA_Tevd_v_opd_var1: m_A11 = %d\n", m_A11 );
259#endif
260
261 // Adjust ij_begin, which gets us ready for the next submatrix
262 // search in the current sweep.
263 ij_begin = ijBR + 1;
264
265 // Index to the submatrices upon which we will operate.
266 d1 = buff_d + ijTL * inc_d;
267 e1 = buff_e + ijTL * inc_e;
268 G = buff_G + ijTL * rs_G;
269
270 // Search for a batch of eigenvalues, recursing on deflated
271 // subproblems whenever a split occurs. Iteration continues
272 // as long as:
273 // (a) there is still matrix left to operate on, and
274 // (b) the number of iterations performed in this batch is
275 // less than n_G.
276 // If/when either of the two above conditions fails to hold,
277 // the function returns.
279 n_G,
280 ijTL,
281 d1, inc_d,
282 e1, inc_e,
283 G, rs_G, cs_G,
284 &n_iter_perf );
285
286 // Record the number of deflations that were observed.
288
289 // Update the maximum number of iterations performed in the
290 // current sweep.
292
293#ifdef PRINTF
294printf( "FLA_Tevd_v_opd_var1: deflations observed = %d\n", n_deflations );
295printf( "FLA_Tevd_v_opd_var1: total deflations observed = %d\n", total_deflations );
296printf( "FLA_Tevd_v_opd_var1: num iterations performed = %d\n", n_iter_perf );
297#endif
298
299 // Store the most recent value of ijBR in m_G_sweep_max.
300 // When the sweep is done, this value will contain the minimum
301 // number of rows of G we can apply and safely include all
302 // non-identity rotations that were computed during the
303 // eigenvalue searches.
305
306 // Make sure we haven't exceeded our maximum iteration count.
307 if ( n_iter_prev >= m_A * n_iter_max )
308 {
309#ifdef PRINTF
310printf( "FLA_Tevd_v_opd_var1: reached maximum total number of iterations: %d\n", n_iter_prev );
311#endif
312 FLA_Abort();
313 //return FLA_FAILURE;
314 }
315 }
316
317 // The sweep is complete. Now we must apply the Givens rotations
318 // that were accumulated during the sweep.
319
320 // Recall that the number of columns of U to which we apply
321 // rotations is one more than the number of rotations.
323
324#ifdef PRINTF
325printf( "FLA_Tevd_v_opd_var1: applying %d sets of Givens rotations\n", n_iter_perf_sweep_max );
326#endif
327
328 // Apply the Givens rotations. Note that we optimize the scope
329 // of the operation in two ways:
330 // 1. We only apply k sets of Givens rotations, where
331 // k = n_iter_perf_sweep_max. We could simply always apply
332 // n_G sets of rotations since G is initialized to contain
333 // identity rotations in every element, but we do this to
334 // save a little bit of time.
335 // 2. We only apply to the first n_U_apply columns of A since
336 // this is the most we need to touch given the ijBR index
337 // bound of the last submatrix found in the previous sweep.
338 // Similar to above, we could simply always perform the
339 // application on all m_A columns of A, but instead we apply
340 // only to the first n_U_apply columns to save time.
341 //FLA_Apply_G_rf_bld_var1( n_iter_perf_sweep_max,
342 //FLA_Apply_G_rf_bld_var2( n_iter_perf_sweep_max,
344 //FLA_Apply_G_rf_bld_var9( n_iter_perf_sweep_max,
345 //FLA_Apply_G_rf_bld_var6( n_iter_perf_sweep_max,
346 m_U,
347 n_U_apply,
348 buff_G, rs_G, cs_G,
349 buff_U, rs_U, cs_U,
350 b_alg );
351
352
353
354 // Increment the total number of iterations previously performed.
356
357#ifdef PRINTF
358printf( "FLA_Tevd_v_opd_var1: total number of iterations performed: %d\n", n_iter_prev );
359#endif
360 }
361
362 return n_iter_prev;
363}
FLA_Error FLA_Apply_G_rf_bld_var3(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg)
Definition FLA_Apply_G_rf_blk_var3.c:128
FLA_Error FLA_Tevd_iteracc_v_opd_var1(int m_A, int n_G, int ijTL, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, int *n_iter_perf)
Definition FLA_Tevd_iteracc_v_opt_var1.c:26
FLA_Error FLA_Tevd_find_submatrix_opd(int m_A, int ij_begin, double *buff_d, int inc_d, double *buff_e, int inc_e, int *ijTL, int *ijBR)
Definition FLA_Tevd_find_submatrix.c:28
void FLA_Abort(void)
Definition FLA_Error.c:248
dcomplex bl1_z1(void)
Definition bl1_constants.c:69
void bl1_zsetm(int m, int n, dcomplex *sigma, dcomplex *a, int a_rs, int a_cs)
Definition bl1_setm.c:78
Definition blis_type_defs.h:138

References bl1_z1(), bl1_zsetm(), FLA_Abort(), FLA_Apply_G_rf_bld_var3(), FLA_Tevd_find_submatrix_opd(), FLA_Tevd_iteracc_v_opd_var1(), and i.

Referenced by FLA_Tevd_v_opt_var1().

◆ FLA_Tevd_v_opd_var2()

FLA_Error FLA_Tevd_v_opd_var2 ( int  m_A,
int  m_U,
int  n_G,
int  n_G_extra,
double buff_d,
int  inc_d,
double buff_e,
int  inc_e,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double buff_R,
int  rs_R,
int  cs_R,
double buff_W,
int  rs_W,
int  cs_W,
double buff_U,
int  rs_U,
int  cs_U,
int  b_alg 
)
181{
182 dcomplex one = bl1_z1();
183 double rone = bl1_d1();
184 double rzero = bl1_d0();
185
186 dcomplex* G;
187 double* d1;
188 double* e1;
189 int r_val;
190 int done;
191 int m_G_sweep_max;
192 int ij_begin;
193 int ijTL, ijBR;
194 int m_A11;
195 int n_iter_perf;
196 int n_U_apply;
198 int n_deflations;
199 int n_iter_prev;
201
202 // Initialize our completion flag.
203 done = FALSE;
204
205 // Initialize a counter that holds the maximum number of rows of G
206 // that we would need to initialize for the next sweep.
207 m_G_sweep_max = m_A - 1;
208
209 // Initialize a counter for the total number of iterations performed.
210 n_iter_prev = 0;
211
212 // Initialize R to identity.
214 buff_R, rs_R, cs_R );
215
216 // Iterate until the matrix has completely deflated.
217 for ( total_deflations = 0; done != TRUE; )
218 {
219
220 // Initialize G to contain only identity rotations.
222 n_G,
223 &one,
224 buff_G, rs_G, cs_G );
225
226 // Keep track of the maximum number of iterations performed in the
227 // current sweep. This is used when applying the sweep's Givens
228 // rotations.
230
231 // Perform a sweep: Move through the matrix and perform a tridiagonal
232 // EVD on each non-zero submatrix that is encountered. During the
233 // first time through, ijTL will be 0 and ijBR will be m_A - 1.
234 for ( ij_begin = 0; ij_begin < m_A; )
235 {
236
237#ifdef PRINTF
238if ( ij_begin == 0 )
239printf( "FLA_Tevd_v_opd_var2: beginning new sweep (ij_begin = %d)\n", ij_begin );
240#endif
241
242 // Search for the first submatrix along the diagonal that is
243 // bounded by zeroes (or endpoints of the matrix). If no
244 // submatrix is found (ie: if the entire subdiagonal is zero
245 // then FLA_FAILURE is returned. This function also inspects
246 // subdiagonal elements for proximity to zero. If a given
247 // element is close enough to zero, then it is deemed
248 // converged and manually set to zero.
250 ij_begin,
251 buff_d, inc_d,
252 buff_e, inc_e,
253 &ijTL,
254 &ijBR );
255
256 // Verify that a submatrix was found. If one was not found,
257 // then we are done with the current sweep. Furthermore, if
258 // a submatrix was not found AND we began our search at the
259 // beginning of the matrix (ie: ij_begin == 0), then the
260 // matrix has completely deflated and so we are done with
261 // Francis step iteration.
262 if ( r_val == FLA_FAILURE )
263 {
264 if ( ij_begin == 0 )
265 {
266#ifdef PRINTF
267printf( "FLA_Tevd_v_opd_var2: subdiagonal is completely zero.\n" );
268printf( "FLA_Tevd_v_opd_var2: Francis iteration is done!\n" );
269#endif
270 done = TRUE;
271 }
272
273 // Break out of the current sweep so we can apply the last
274 // remaining Givens rotations.
275 break;
276 }
277
278 // If we got this far, then:
279 // (a) ijTL refers to the index of the first non-zero
280 // subdiagonal along the diagonal, and
281 // (b) ijBR refers to either:
282 // - the first zero element that occurs after ijTL, or
283 // - the the last diagonal element.
284 // Note that ijTL and ijBR also correspond to the first and
285 // last diagonal elements of the submatrix of interest. Thus,
286 // we may compute the dimension of this submatrix as:
287 m_A11 = ijBR - ijTL + 1;
288
289#ifdef PRINTF
290printf( "FLA_Tevd_v_opd_var2: ij_begin = %d\n", ij_begin );
291printf( "FLA_Tevd_v_opd_var2: ijTL = %d\n", ijTL );
292printf( "FLA_Tevd_v_opd_var2: ijBR = %d\n", ijBR );
293printf( "FLA_Tevd_v_opd_var2: m_A11 = %d\n", m_A11 );
294#endif
295
296 // Adjust ij_begin, which gets us ready for the next subproblem, if
297 // there is one.
298 ij_begin = ijBR + 1;
299
300 // Index to the submatrices upon which we will operate.
301 d1 = buff_d + ijTL * inc_d;
302 e1 = buff_e + ijTL * inc_e;
303 G = buff_G + ijTL * rs_G;
304
305 // Search for a batch of eigenvalues, recursing on deflated
306 // subproblems whenever a split occurs. Iteration continues
307 // as long as
308 // (a) there is still matrix left to operate on, and
309 // (b) the number of iterations performed in this batch is
310 // less than n_G.
311 // If/when either of the two above conditions fails to hold,
312 // the function returns.
314 n_G,
315 ijTL,
316 d1, inc_d,
317 e1, inc_e,
318 G, rs_G, cs_G,
319 &n_iter_perf );
320
321 // Record the number of deflations that we observed.
323
324 // Update the maximum number of iterations performed in the
325 // current sweep.
327
328#ifdef PRINTF
329printf( "FLA_Tevd_v_opd_var2: deflations observed = %d\n", n_deflations );
330printf( "FLA_Tevd_v_opd_var2: total deflations observed = %d\n", total_deflations );
331printf( "FLA_Tevd_v_opd_var2: num iterations = %d\n", n_iter_perf );
332#endif
333
334 // Store the most recent value of ijBR in m_G_sweep_max.
335 // When the sweep is done, this value will contain the minimum
336 // number of rows of G we can apply and safely include all
337 // non-identity rotations that were computed during the
338 // eigenvalue searches.
340
341 // Make sure we haven't exceeded our maximum iteration count.
342 if ( n_iter_prev >= m_A * n_iter_max )
343 {
344#ifdef PRINTF
345printf( "FLA_Tevd_v_opd_var2: reached maximum total number of iterations: %d\n", n_iter_prev );
346#endif
347 FLA_Abort();
348 //return FLA_FAILURE;
349 }
350 }
351
352 // The sweep is complete. Now we must apply the Givens rotations
353 // that were accumulated during the sweep.
354
355
356 // Recall that the number of columns of U to which we apply
357 // rotations is one more than the number of rotations.
359
360 // Apply the Givens rotations that were computed as part of
361 // the previous batch of iterations.
362 //FLA_Apply_G_rf_bld_var8b( n_iter_perf_sweep_max,
363 //FLA_Apply_G_rf_bld_var5b( n_iter_perf_sweep_max,
365 //FLA_Apply_G_rf_bld_var9b( n_iter_perf_sweep_max,
366 //FLA_Apply_G_rf_bld_var6b( n_iter_perf_sweep_max,
367 m_U,
368 n_U_apply,
370 buff_G, rs_G, cs_G,
371 buff_R, rs_R, cs_R,
372 b_alg );
373
374#ifdef PRINTF
375printf( "FLA_Tevd_v_opd_var2: applying %d sets of Givens rotations\n", n_iter_perf_sweep_max );
376#endif
377
378 // Increment the total number of iterations previously performed.
380 }
381
382 // Copy the contents of Q to temporary storage.
384 m_A,
385 m_A,
386 buff_U, rs_U, cs_U,
387 buff_W, rs_W, cs_W );
388
389
390 // Multiply Q by R, overwriting U.
393 m_A,
394 m_A,
395 m_A,
396 &rone,
397 ( double* )buff_W, rs_W, cs_W,
398 buff_R, rs_R, cs_R,
399 &rzero,
400 ( double* )buff_U, rs_U, cs_U );
401
402 return n_iter_prev;
403}
FLA_Error FLA_Apply_G_rf_bld_var3b(int k_G, int m_A, int n_A, int i_k, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg)
Definition FLA_Apply_G_rf_blk_var3b.c:135
void bl1_dcopymt(trans1_t trans, int m, int n, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
Definition bl1_copymt.c:148
void bl1_dgemm(trans1_t transa, trans1_t transb, int m, int k, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs, double *beta, double *c, int c_rs, int c_cs)
Definition bl1_gemm.c:274
void bl1_dident(int m, double *a, int a_rs, int a_cs)
Definition bl1_ident.c:32
@ BLIS1_NO_TRANSPOSE
Definition blis_type_defs.h:54

References bl1_d0(), bl1_d1(), bl1_dcopymt(), bl1_dgemm(), bl1_dident(), bl1_z1(), bl1_zsetm(), BLIS1_NO_TRANSPOSE, FLA_Abort(), FLA_Apply_G_rf_bld_var3b(), FLA_Tevd_find_submatrix_opd(), FLA_Tevd_iteracc_v_opd_var1(), and i.

Referenced by FLA_Tevd_v_opt_var2().

◆ FLA_Tevd_v_ops_var1()

FLA_Error FLA_Tevd_v_ops_var1 ( int  m_A,
int  m_U,
int  n_G,
int  n_iter_max,
float buff_d,
int  inc_d,
float buff_e,
int  inc_e,
scomplex buff_G,
int  rs_G,
int  cs_G,
float buff_U,
int  rs_U,
int  cs_U,
int  b_alg 
)
136{
138
139 return FLA_SUCCESS;
140}

References i.

Referenced by FLA_Tevd_v_opt_var1().

◆ FLA_Tevd_v_ops_var2()

FLA_Error FLA_Tevd_v_ops_var2 ( int  m_A,
int  m_U,
int  n_G,
int  n_G_extra,
float buff_d,
int  inc_d,
float buff_e,
int  inc_e,
scomplex buff_G,
int  rs_G,
int  cs_G,
float buff_R,
int  rs_R,
int  cs_R,
float buff_W,
int  rs_W,
int  cs_W,
float buff_U,
int  rs_U,
int  cs_U,
int  b_alg 
)
162{
164
165 return FLA_SUCCESS;
166}

References i.

Referenced by FLA_Tevd_v_opt_var2().

◆ FLA_Tevd_v_opt_var1()

FLA_Error FLA_Tevd_v_opt_var1 ( dim_t  n_iter_max,
FLA_Obj  d,
FLA_Obj  e,
FLA_Obj  G,
FLA_Obj  U,
dim_t  b_alg 
)
14{
16 FLA_Datatype datatype;
17 int m_A, m_U, n_G;
18 int inc_d;
19 int inc_e;
20 int rs_G, cs_G;
21 int rs_U, cs_U;
22
23 datatype = FLA_Obj_datatype( U );
24
26 m_U = FLA_Obj_length( U );
27 n_G = FLA_Obj_width( G );
28
31
34
37
38
39 switch ( datatype )
40 {
41 case FLA_FLOAT:
42 {
43 float* buff_d = FLA_FLOAT_PTR( d );
44 float* buff_e = FLA_FLOAT_PTR( e );
46 float* buff_U = FLA_FLOAT_PTR( U );
47
49 m_U,
50 n_G,
56 b_alg );
57
58 break;
59 }
60
61 case FLA_DOUBLE:
62 {
63 double* buff_d = FLA_DOUBLE_PTR( d );
64 double* buff_e = FLA_DOUBLE_PTR( e );
66 double* buff_U = FLA_DOUBLE_PTR( U );
67
69 m_U,
70 n_G,
76 b_alg );
77
78 break;
79 }
80
81 case FLA_COMPLEX:
82 {
83 float* buff_d = FLA_FLOAT_PTR( d );
84 float* buff_e = FLA_FLOAT_PTR( e );
87
89 m_U,
90 n_G,
96 b_alg );
97
98 break;
99 }
100
102 {
103 double* buff_d = FLA_DOUBLE_PTR( d );
104 double* buff_e = FLA_DOUBLE_PTR( e );
107
109 m_U,
110 n_G,
112 buff_d, inc_d,
113 buff_e, inc_e,
114 buff_G, rs_G, cs_G,
115 buff_U, rs_U, cs_U,
116 b_alg );
117
118 break;
119 }
120 }
121
122 return r_val;
123}
FLA_Error FLA_Tevd_v_opz_var1(int m_A, int m_U, int n_G, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_U, int rs_U, int cs_U, int b_alg)
Definition FLA_Tevd_v_opt_var1.c:380
FLA_Error FLA_Tevd_v_opd_var1(int m_A, int m_U, int n_G, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, double *buff_U, int rs_U, int cs_U, int b_alg)
Definition FLA_Tevd_v_opt_var1.c:144
FLA_Error FLA_Tevd_v_ops_var1(int m_A, int m_U, int n_G, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, float *buff_U, int rs_U, int cs_U, int b_alg)
Definition FLA_Tevd_v_opt_var1.c:127
FLA_Error FLA_Tevd_v_opc_var1(int m_A, int m_U, int n_G, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_U, int rs_U, int cs_U, int b_alg)
Definition FLA_Tevd_v_opt_var1.c:365
dim_t FLA_Obj_width(FLA_Obj obj)
Definition FLA_Query.c:123
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition FLA_Query.c:167
dim_t FLA_Obj_length(FLA_Obj obj)
Definition FLA_Query.c:116
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition FLA_Query.c:174
int FLA_Error
Definition FLA_type_defs.h:47
Definition blis_type_defs.h:133

References FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_dim(), FLA_Obj_vector_inc(), FLA_Obj_width(), FLA_Tevd_v_opc_var1(), FLA_Tevd_v_opd_var1(), FLA_Tevd_v_ops_var1(), FLA_Tevd_v_opz_var1(), and i.

Referenced by FLA_Hevd_lv_unb_var1().

◆ FLA_Tevd_v_opt_var2()

FLA_Error FLA_Tevd_v_opt_var2 ( dim_t  n_iter_max,
FLA_Obj  d,
FLA_Obj  e,
FLA_Obj  G,
FLA_Obj  R,
FLA_Obj  W,
FLA_Obj  U,
dim_t  b_alg 
)
14{
16 FLA_Datatype datatype;
17 int m_A, m_U, n_G;
18 int inc_d;
19 int inc_e;
20 int rs_G, cs_G;
21 int rs_R, cs_R;
22 int rs_U, cs_U;
23 int rs_W, cs_W;
24
25 datatype = FLA_Obj_datatype( U );
26
28 m_U = FLA_Obj_length( U );
29 n_G = FLA_Obj_width( G );
30
33
36
39
42
45
46
47 switch ( datatype )
48 {
49 case FLA_FLOAT:
50 {
51 float* buff_d = FLA_FLOAT_PTR( d );
52 float* buff_e = FLA_FLOAT_PTR( e );
54 float* buff_R = FLA_FLOAT_PTR( R );
55 float* buff_W = FLA_FLOAT_PTR( W );
56 float* buff_U = FLA_FLOAT_PTR( U );
57
59 m_U,
60 n_G,
68 b_alg );
69
70 break;
71 }
72
73 case FLA_DOUBLE:
74 {
75 double* buff_d = FLA_DOUBLE_PTR( d );
76 double* buff_e = FLA_DOUBLE_PTR( e );
78 double* buff_R = FLA_DOUBLE_PTR( R );
79 double* buff_W = FLA_DOUBLE_PTR( W );
80 double* buff_U = FLA_DOUBLE_PTR( U );
81
83 m_U,
84 n_G,
92 b_alg );
93
94 break;
95 }
96
97 case FLA_COMPLEX:
98 {
99 float* buff_d = FLA_FLOAT_PTR( d );
100 float* buff_e = FLA_FLOAT_PTR( e );
102 float* buff_R = FLA_FLOAT_PTR( R );
105
107 m_U,
108 n_G,
110 buff_d, inc_d,
111 buff_e, inc_e,
112 buff_G, rs_G, cs_G,
113 buff_R, rs_R, cs_R,
114 buff_W, rs_W, cs_W,
115 buff_U, rs_U, cs_U,
116 b_alg );
117
118 break;
119 }
120
122 {
123 double* buff_d = FLA_DOUBLE_PTR( d );
124 double* buff_e = FLA_DOUBLE_PTR( e );
126 double* buff_R = FLA_DOUBLE_PTR( R );
129
131 m_U,
132 n_G,
134 buff_d, inc_d,
135 buff_e, inc_e,
136 buff_G, rs_G, cs_G,
137 buff_R, rs_R, cs_R,
138 buff_W, rs_W, cs_W,
139 buff_U, rs_U, cs_U,
140 b_alg );
141
142 break;
143 }
144 }
145
146 return r_val;
147}
FLA_Error FLA_Tevd_v_opc_var2(int m_A, int m_U, int n_G, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, float *buff_R, int rs_R, int cs_R, scomplex *buff_W, int rs_W, int cs_W, scomplex *buff_U, int rs_U, int cs_U, int b_alg)
Definition FLA_Tevd_v_opt_var2.c:405
FLA_Error FLA_Tevd_v_ops_var2(int m_A, int m_U, int n_G, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, float *buff_R, int rs_R, int cs_R, float *buff_W, int rs_W, int cs_W, float *buff_U, int rs_U, int cs_U, int b_alg)
Definition FLA_Tevd_v_opt_var2.c:151
FLA_Error FLA_Tevd_v_opd_var2(int m_A, int m_U, int n_G, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, double *buff_R, int rs_R, int cs_R, double *buff_W, int rs_W, int cs_W, double *buff_U, int rs_U, int cs_U, int b_alg)
Definition FLA_Tevd_v_opt_var2.c:170
FLA_Error FLA_Tevd_v_opz_var2(int m_A, int m_U, int n_G, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, double *buff_R, int rs_R, int cs_R, dcomplex *buff_W, int rs_W, int cs_W, dcomplex *buff_U, int rs_U, int cs_U, int b_alg)
Definition FLA_Tevd_v_opt_var2.c:424

References FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_dim(), FLA_Obj_vector_inc(), FLA_Obj_width(), FLA_Tevd_v_opc_var2(), FLA_Tevd_v_opd_var2(), FLA_Tevd_v_ops_var2(), FLA_Tevd_v_opz_var2(), and i.

Referenced by FLA_Hevd_lv_unb_var2().

◆ FLA_Tevd_v_opz_var1()

FLA_Error FLA_Tevd_v_opz_var1 ( int  m_A,
int  m_U,
int  n_G,
int  n_iter_max,
double buff_d,
int  inc_d,
double buff_e,
int  inc_e,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_U,
int  rs_U,
int  cs_U,
int  b_alg 
)
389{
390 dcomplex one = bl1_z1();
391
392 dcomplex* G;
393 double* d1;
394 double* e1;
395 int r_val;
396 int done;
397 int m_G_sweep_max;
398 int ij_begin;
399 int ijTL, ijBR;
400 int m_A11;
401 int n_iter_perf;
402 int n_U_apply;
404 int n_deflations;
405 int n_iter_prev;
407
408 // Initialize our completion flag.
409 done = FALSE;
410
411 // Initialize a counter that holds the maximum number of rows of G
412 // that we would need to initialize for the next sweep.
413 m_G_sweep_max = m_A - 1;
414
415 // Initialize a counter for the total number of iterations performed.
416 n_iter_prev = 0;
417
418 // Iterate until the matrix has completely deflated.
419 for ( total_deflations = 0; done != TRUE; )
420 {
421
422 // Initialize G to contain only identity rotations.
424 n_G,
425 &one,
426 buff_G, rs_G, cs_G );
427
428 // Keep track of the maximum number of iterations performed in the
429 // current sweep. This is used when applying the sweep's Givens
430 // rotations.
432
433 // Perform a sweep: Move through the matrix and perform a tridiagonal
434 // EVD on each non-zero submatrix that is encountered. During the
435 // first time through, ijTL will be 0 and ijBR will be m_A - 1.
436 for ( ij_begin = 0; ij_begin < m_A; )
437 {
438
439#ifdef PRINTF
440if ( ij_begin == 0 )
441printf( "FLA_Tevd_v_opz_var1: beginning new sweep (ij_begin = %d)\n", ij_begin );
442#endif
443
444 // Search for the first submatrix along the diagonal that is
445 // bounded by zeroes (or endpoints of the matrix). If no
446 // submatrix is found (ie: if the entire subdiagonal is zero
447 // then FLA_FAILURE is returned. This function also inspects
448 // subdiagonal elements for proximity to zero. If a given
449 // element is close enough to zero, then it is deemed
450 // converged and manually set to zero.
452 ij_begin,
453 buff_d, inc_d,
454 buff_e, inc_e,
455 &ijTL,
456 &ijBR );
457
458 // Verify that a submatrix was found. If one was not found,
459 // then we are done with the current sweep. Furthermore, if
460 // a submatrix was not found AND we began our search at the
461 // beginning of the matrix (ie: ij_begin == 0), then the
462 // matrix has completely deflated and so we are done with
463 // Francis step iteration.
464 if ( r_val == FLA_FAILURE )
465 {
466 if ( ij_begin == 0 )
467 {
468#ifdef PRINTF
469printf( "FLA_Tevd_v_opz_var1: subdiagonal is completely zero.\n" );
470printf( "FLA_Tevd_v_opz_var1: Francis iteration is done!\n" );
471#endif
472 done = TRUE;
473 }
474
475 // Break out of the current sweep so we can apply the last
476 // remaining Givens rotations.
477 break;
478 }
479
480 // If we got this far, then:
481 // (a) ijTL refers to the index of the first non-zero
482 // subdiagonal along the diagonal, and
483 // (b) ijBR refers to either:
484 // - the first zero element that occurs after ijTL, or
485 // - the the last diagonal element.
486 // Note that ijTL and ijBR also correspond to the first and
487 // last diagonal elements of the submatrix of interest. Thus,
488 // we may compute the dimension of this submatrix as:
489 m_A11 = ijBR - ijTL + 1;
490
491#ifdef PRINTF
492printf( "FLA_Tevd_v_opz_var1: ij_begin = %d\n", ij_begin );
493printf( "FLA_Tevd_v_opz_var1: ijTL = %d\n", ijTL );
494printf( "FLA_Tevd_v_opz_var1: ijBR = %d\n", ijBR );
495printf( "FLA_Tevd_v_opz_var1: m_A11 = %d\n", m_A11 );
496#endif
497
498 // Adjust ij_begin, which gets us ready for the next submatrix
499 // search in the current sweep.
500 ij_begin = ijBR + 1;
501
502 // Index to the submatrices upon which we will operate.
503 d1 = buff_d + ijTL * inc_d;
504 e1 = buff_e + ijTL * inc_e;
505 G = buff_G + ijTL * rs_G;
506
507 // Search for a batch of eigenvalues, recursing on deflated
508 // subproblems whenever a split occurs. Iteration continues
509 // as long as:
510 // (a) there is still matrix left to operate on, and
511 // (b) the number of iterations performed in this batch is
512 // less than n_G.
513 // If/when either of the two above conditions fails to hold,
514 // the function returns.
516 n_G,
517 ijTL,
518 d1, inc_d,
519 e1, inc_e,
520 G, rs_G, cs_G,
521 &n_iter_perf );
522
523 // Record the number of deflations that were observed.
525
526 // Update the maximum number of iterations performed in the
527 // current sweep.
529
530#ifdef PRINTF
531printf( "FLA_Tevd_v_opz_var1: deflations observed = %d\n", n_deflations );
532printf( "FLA_Tevd_v_opz_var1: total deflations observed = %d\n", total_deflations );
533printf( "FLA_Tevd_v_opz_var1: num iterations performed = %d\n", n_iter_perf );
534#endif
535
536 // Store the most recent value of ijBR in m_G_sweep_max.
537 // When the sweep is done, this value will contain the minimum
538 // number of rows of G we can apply and safely include all
539 // non-identity rotations that were computed during the
540 // eigenvalue searches.
542
543 // Make sure we haven't exceeded our maximum iteration count.
544 if ( n_iter_prev >= m_A * n_iter_max )
545 {
546#ifdef PRINTF
547printf( "FLA_Tevd_v_opz_var1: reached maximum total number of iterations: %d\n", n_iter_prev );
548#endif
549 FLA_Abort();
550 //return FLA_FAILURE;
551 }
552 }
553
554 // The sweep is complete. Now we must apply the Givens rotations
555 // that were accumulated during the sweep.
556
557 // Recall that the number of columns of U to which we apply
558 // rotations is one more than the number of rotations.
560
561#ifdef PRINTF
562printf( "FLA_Tevd_v_opz_var1: applying %d sets of Givens rotations\n", n_iter_perf_sweep_max );
563#endif
564
565 // Apply the Givens rotations. Note that we optimize the scope
566 // of the operation in two ways:
567 // 1. We only apply k sets of Givens rotations, where
568 // k = n_iter_perf_sweep_max. We could simply always apply
569 // n_G sets of rotations since G is initialized to contain
570 // identity rotations in every element, but we do this to
571 // save a little bit of time.
572 // 2. We only apply to the first n_U_apply columns of A since
573 // this is the most we need to touch given the ijBR index
574 // bound of the last submatrix found in the previous sweep.
575 // Similar to above, we could simply always perform the
576 // application on all m_A columns of A, but instead we apply
577 // only to the first n_U_apply columns to save time.
578 //FLA_Apply_G_rf_blz_var5( n_iter_perf_sweep_max,
580 //FLA_Apply_G_rf_blz_var9( n_iter_perf_sweep_max,
581 //FLA_Apply_G_rf_blz_var6( n_iter_perf_sweep_max,
582 m_U,
583 n_U_apply,
584 buff_G, rs_G, cs_G,
585 buff_U, rs_U, cs_U,
586 b_alg );
587
588 // Increment the total number of iterations previously performed.
590
591#ifdef PRINTF
592printf( "FLA_Tevd_v_opz_var1: total number of iterations performed: %d\n", n_iter_prev );
593#endif
594 }
595
596 return n_iter_prev;
597}
FLA_Error FLA_Apply_G_rf_blz_var3(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg)
Definition FLA_Apply_G_rf_blk_var3.c:186

References bl1_z1(), bl1_zsetm(), FLA_Abort(), FLA_Apply_G_rf_blz_var3(), FLA_Tevd_find_submatrix_opd(), FLA_Tevd_iteracc_v_opd_var1(), and i.

Referenced by FLA_Tevd_v_opt_var1().

◆ FLA_Tevd_v_opz_var2()

FLA_Error FLA_Tevd_v_opz_var2 ( int  m_A,
int  m_U,
int  n_G,
int  n_G_extra,
double buff_d,
int  inc_d,
double buff_e,
int  inc_e,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double buff_R,
int  rs_R,
int  cs_R,
dcomplex buff_W,
int  rs_W,
int  cs_W,
dcomplex buff_U,
int  rs_U,
int  cs_U,
int  b_alg 
)
435{
436 dcomplex one = bl1_z1();
437 double rone = bl1_d1();
438 double rzero = bl1_d0();
439
440 dcomplex* G;
441 double* d1;
442 double* e1;
443 int r_val;
444 int done;
445 int m_G_sweep_max;
446 int ij_begin;
447 int ijTL, ijBR;
448 int m_A11;
449 int n_iter_perf;
450 int n_U_apply;
452 int n_deflations;
453 int n_iter_prev;
455
456 // Initialize our completion flag.
457 done = FALSE;
458
459 // Initialize a counter that holds the maximum number of rows of G
460 // that we would need to initialize for the next sweep.
461 m_G_sweep_max = m_A - 1;
462
463 // Initialize a counter for the total number of iterations performed.
464 n_iter_prev = 0;
465
466 // Initialize R to identity.
468 buff_R, rs_R, cs_R );
469
470 // Iterate until the matrix has completely deflated.
471 for ( total_deflations = 0; done != TRUE; )
472 {
473
474 // Initialize G to contain only identity rotations.
476 n_G,
477 &one,
478 buff_G, rs_G, cs_G );
479
480 // Keep track of the maximum number of iterations performed in the
481 // current sweep. This is used when applying the sweep's Givens
482 // rotations.
484
485 // Perform a sweep: Move through the matrix and perform a tridiagonal
486 // EVD on each non-zero submatrix that is encountered. During the
487 // first time through, ijTL will be 0 and ijBR will be m_A - 1.
488 for ( ij_begin = 0; ij_begin < m_A; )
489 {
490
491#ifdef PRINTF
492if ( ij_begin == 0 )
493printf( "FLA_Tevd_v_opz_var2: beginning new sweep (ij_begin = %d)\n", ij_begin );
494#endif
495
496 // Search for the first submatrix along the diagonal that is
497 // bounded by zeroes (or endpoints of the matrix). If no
498 // submatrix is found (ie: if the entire subdiagonal is zero
499 // then FLA_FAILURE is returned. This function also inspects
500 // subdiagonal elements for proximity to zero. If a given
501 // element is close enough to zero, then it is deemed
502 // converged and manually set to zero.
504 ij_begin,
505 buff_d, inc_d,
506 buff_e, inc_e,
507 &ijTL,
508 &ijBR );
509
510 // Verify that a submatrix was found. If one was not found,
511 // then we are done with the current sweep. Furthermore, if
512 // a submatrix was not found AND we began our search at the
513 // beginning of the matrix (ie: ij_begin == 0), then the
514 // matrix has completely deflated and so we are done with
515 // Francis step iteration.
516 if ( r_val == FLA_FAILURE )
517 {
518 if ( ij_begin == 0 )
519 {
520#ifdef PRINTF
521printf( "FLA_Tevd_v_opz_var2: subdiagonal is completely zero.\n" );
522printf( "FLA_Tevd_v_opz_var2: Francis iteration is done!\n" );
523#endif
524 done = TRUE;
525 }
526
527 // Break out of the current sweep so we can apply the last
528 // remaining Givens rotations.
529 break;
530 }
531
532 // If we got this far, then:
533 // (a) ijTL refers to the index of the first non-zero
534 // subdiagonal along the diagonal, and
535 // (b) ijBR refers to either:
536 // - the first zero element that occurs after ijTL, or
537 // - the the last diagonal element.
538 // Note that ijTL and ijBR also correspond to the first and
539 // last diagonal elements of the submatrix of interest. Thus,
540 // we may compute the dimension of this submatrix as:
541 m_A11 = ijBR - ijTL + 1;
542
543#ifdef PRINTF
544printf( "FLA_Tevd_v_opz_var2: ij_begin = %d\n", ij_begin );
545printf( "FLA_Tevd_v_opz_var2: ijTL = %d\n", ijTL );
546printf( "FLA_Tevd_v_opz_var2: ijBR = %d\n", ijBR );
547printf( "FLA_Tevd_v_opz_var2: m_A11 = %d\n", m_A11 );
548#endif
549
550 // Adjust ij_begin, which gets us ready for the next subproblem, if
551 // there is one.
552 ij_begin = ijBR + 1;
553
554 // Index to the submatrices upon which we will operate.
555 d1 = buff_d + ijTL * inc_d;
556 e1 = buff_e + ijTL * inc_e;
557 G = buff_G + ijTL * rs_G;
558
559 // Search for a batch of eigenvalues, recursing on deflated
560 // subproblems whenever a split occurs. Iteration continues
561 // as long as
562 // (a) there is still matrix left to operate on, and
563 // (b) the number of iterations performed in this batch is
564 // less than n_G.
565 // If/when either of the two above conditions fails to hold,
566 // the function returns.
568 n_G,
569 ijTL,
570 d1, inc_d,
571 e1, inc_e,
572 G, rs_G, cs_G,
573 &n_iter_perf );
574
575 // Record the number of deflations that we observed.
577
578 // Update the maximum number of iterations performed in the
579 // current sweep.
581
582#ifdef PRINTF
583printf( "FLA_Tevd_v_opz_var2: deflations observed = %d\n", n_deflations );
584printf( "FLA_Tevd_v_opz_var2: total deflations observed = %d\n", total_deflations );
585printf( "FLA_Tevd_v_opz_var2: num iterations = %d\n", n_iter_perf );
586#endif
587
588 // Store the most recent value of ijBR in m_G_sweep_max.
589 // When the sweep is done, this value will contain the minimum
590 // number of rows of G we can apply and safely include all
591 // non-identity rotations that were computed during the
592 // eigenvalue searches.
594
595 // Make sure we haven't exceeded our maximum iteration count.
596 if ( n_iter_prev >= m_A * n_iter_max )
597 {
598#ifdef PRINTF
599printf( "FLA_Tevd_v_opz_var2: reached maximum total number of iterations: %d\n", n_iter_prev );
600#endif
601 FLA_Abort();
602 //return FLA_FAILURE;
603 }
604 }
605
606 // The sweep is complete. Now we must apply the Givens rotations
607 // that were accumulated during the sweep.
608
609
610 // Recall that the number of columns of U to which we apply
611 // rotations is one more than the number of rotations.
613
614 // Apply the Givens rotations that were computed as part of
615 // the previous batch of iterations.
616 //FLA_Apply_G_rf_bld_var8b( n_iter_perf_sweep_max,
617 //FLA_Apply_G_rf_bld_var5b( n_iter_perf_sweep_max,
619 //FLA_Apply_G_rf_bld_var9b( n_iter_perf_sweep_max,
620 //FLA_Apply_G_rf_bld_var6b( n_iter_perf_sweep_max,
621 m_U,
622 n_U_apply,
624 buff_G, rs_G, cs_G,
625 buff_R, rs_R, cs_R,
626 b_alg );
627
628#ifdef PRINTF
629printf( "FLA_Tevd_v_opz_var2: applying %d sets of Givens rotations\n", n_iter_perf_sweep_max );
630#endif
631
632 // Increment the total number of iterations previously performed.
634 }
635
636 // Copy the contents of Q to temporary storage.
638 m_A,
639 m_A,
640 buff_U, rs_U, cs_U,
641 buff_W, rs_W, cs_W );
642
643
644 // Multiply Q by R, overwriting U.
647 2*m_A,
648 m_A,
649 m_A,
650 &rone,
651 ( double* )buff_W, rs_W, 2*cs_W,
652 buff_R, rs_R, cs_R,
653 &rzero,
654 ( double* )buff_U, rs_U, 2*cs_U );
655
656 return n_iter_prev;
657}
void bl1_zcopymt(trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
Definition bl1_copymt.c:286

References bl1_d0(), bl1_d1(), bl1_dgemm(), bl1_dident(), bl1_z1(), bl1_zcopymt(), bl1_zsetm(), BLIS1_NO_TRANSPOSE, FLA_Abort(), FLA_Apply_G_rf_bld_var3b(), FLA_Tevd_find_submatrix_opd(), FLA_Tevd_iteracc_v_opd_var1(), and i.

Referenced by FLA_Tevd_v_opt_var2().