libflame revision_anchor
Functions
FLA_Bsvd_v.h File Reference

(r)

Go to the source code of this file.

Functions

FLA_Error FLA_Bsvd_compute_shift (FLA_Obj tol, FLA_Obj sminl, FLA_Obj smax, FLA_Obj d, FLA_Obj e, FLA_Obj shift)
 
FLA_Error FLA_Bsvd_compute_shift_ops (int m_A, float tol, float sminl, float smax, float *buff_d, int inc_d, float *buff_e, int inc_e, float *shift)
 
FLA_Error FLA_Bsvd_compute_shift_opd (int m_A, double tol, double sminl, double smax, double *buff_d, int inc_d, double *buff_e, int inc_e, double *shift)
 
FLA_Error FLA_Bsvd_compute_tol_thresh (FLA_Obj tolmul, FLA_Obj maxit, FLA_Obj d, FLA_Obj e, FLA_Obj tol, FLA_Obj thresh)
 
FLA_Error FLA_Bsvd_compute_tol_thresh_ops (int m_A, float tolmul, float maxit, float *buff_d, int inc_d, float *buff_e, int inc_e, float *tol, float *thresh)
 
FLA_Error FLA_Bsvd_compute_tol_thresh_opd (int m_A, double tolmul, double maxit, double *buff_d, int inc_d, double *buff_e, int inc_e, double *tol, double *thresh)
 
FLA_Error FLA_Bsvd_find_converged (FLA_Obj tol, FLA_Obj d, FLA_Obj e, FLA_Obj sminl)
 
FLA_Error FLA_Bsvd_find_converged_ops (int m_A, float tol, float *buff_d, int inc_d, float *buff_e, int inc_e, float *sminl)
 
FLA_Error FLA_Bsvd_find_converged_opd (int m_A, double tol, double *buff_d, int inc_d, double *buff_e, int inc_e, double *sminl)
 
FLA_Error FLA_Bsvd_find_max_min (FLA_Obj d, FLA_Obj e, FLA_Obj smax, FLA_Obj smin)
 
FLA_Error FLA_Bsvd_find_max_min_ops (int m_A, float *buff_d, int inc_d, float *buff_e, int inc_e, float *smax, float *smin)
 
FLA_Error FLA_Bsvd_find_max_min_opd (int m_A, double *buff_d, int inc_d, double *buff_e, int inc_e, double *smax, double *smin)
 
FLA_Error FLA_Bsvd_find_submatrix_ops (int mn_A, int ij_begin, float *buff_d, int inc_d, float *buff_e, int inc_e, int *ijTL, int *ijBR)
 
FLA_Error FLA_Bsvd_find_submatrix_opd (int mn_A, int ij_begin, double *buff_d, int inc_d, double *buff_e, int inc_e, int *ijTL, int *ijBR)
 
FLA_Error FLA_Bsvd_v_opt_var1 (dim_t n_iter_max, FLA_Obj d, FLA_Obj e, FLA_Obj G, FLA_Obj H, FLA_Obj U, FLA_Obj V, dim_t b_alg)
 
FLA_Error FLA_Bsvd_v_ops_var1 (int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_H, int rs_H, int cs_H, float *buff_U, int rs_U, int cs_U, float *buff_V, int rs_V, int cs_V, int b_alg)
 
FLA_Error FLA_Bsvd_v_opd_var1 (int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_H, int rs_H, int cs_H, double *buff_U, int rs_U, int cs_U, double *buff_V, int rs_V, int cs_V, int b_alg)
 
FLA_Error FLA_Bsvd_v_opc_var1 (int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_H, int rs_H, int cs_H, scomplex *buff_U, int rs_U, int cs_U, scomplex *buff_V, int rs_V, int cs_V, int b_alg)
 
FLA_Error FLA_Bsvd_v_opz_var1 (int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_H, int rs_H, int cs_H, dcomplex *buff_U, int rs_U, int cs_U, dcomplex *buff_V, int rs_V, int cs_V, int b_alg)
 
FLA_Error FLA_Bsvd_v_opt_var2 (dim_t n_iter_max, FLA_Obj d, FLA_Obj e, FLA_Obj G, FLA_Obj H, FLA_Obj RG, FLA_Obj RH, FLA_Obj W, FLA_Obj U, FLA_Obj V, dim_t b_alg)
 
FLA_Error FLA_Bsvd_v_ops_var2 (int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_H, int rs_H, int cs_H, float *buff_RG, int rs_RG, int cs_RG, float *buff_RH, int rs_RH, int cs_RH, float *buff_W, int rs_W, int cs_W, float *buff_U, int rs_U, int cs_U, float *buff_V, int rs_V, int cs_V, int b_alg)
 
FLA_Error FLA_Bsvd_v_opd_var2 (int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_H, int rs_H, int cs_H, double *buff_RG, int rs_RG, int cs_RG, double *buff_RH, int rs_RH, int cs_RH, double *buff_W, int rs_W, int cs_W, double *buff_U, int rs_U, int cs_U, double *buff_V, int rs_V, int cs_V, int b_alg)
 
FLA_Error FLA_Bsvd_v_opc_var2 (int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_H, int rs_H, int cs_H, float *buff_RG, int rs_RG, int cs_RG, float *buff_RH, int rs_RH, int cs_RH, scomplex *buff_W, int rs_W, int cs_W, scomplex *buff_U, int rs_U, int cs_U, scomplex *buff_V, int rs_V, int cs_V, int b_alg)
 
FLA_Error FLA_Bsvd_v_opz_var2 (int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_H, int rs_H, int cs_H, double *buff_RG, int rs_RG, int cs_RG, double *buff_RH, int rs_RH, int cs_RH, dcomplex *buff_W, int rs_W, int cs_W, dcomplex *buff_U, int rs_U, int cs_U, dcomplex *buff_V, int rs_V, int cs_V, int b_alg)
 

Function Documentation

◆ FLA_Bsvd_compute_shift()

FLA_Error FLA_Bsvd_compute_shift ( FLA_Obj  tol,
FLA_Obj  sminl,
FLA_Obj  smax,
FLA_Obj  d,
FLA_Obj  e,
FLA_Obj  shift 
)
14{
15 FLA_Datatype datatype;
16 int m_A;
17 int inc_d;
18 int inc_e;
19
20 datatype = FLA_Obj_datatype( d );
21
23
26
27 switch ( datatype )
28 {
29 case FLA_FLOAT:
30 {
31 float* buff_tol = FLA_FLOAT_PTR( tol );
32 float* buff_sminl = FLA_FLOAT_PTR( sminl );
33 float* buff_smax = FLA_FLOAT_PTR( smax );
34 float* buff_d = FLA_FLOAT_PTR( d );
35 float* buff_e = FLA_FLOAT_PTR( e );
36 float* buff_shift = FLA_FLOAT_PTR( shift );
37
39 *buff_tol,
41 *buff_smax,
44 buff_shift );
45
46 break;
47 }
48
49 case FLA_DOUBLE:
50 {
51 double* buff_tol = FLA_DOUBLE_PTR( tol );
52 double* buff_sminl = FLA_DOUBLE_PTR( sminl );
53 double* buff_smax = FLA_DOUBLE_PTR( smax );
54 double* buff_d = FLA_DOUBLE_PTR( d );
55 double* buff_e = FLA_DOUBLE_PTR( e );
56 double* buff_shift = FLA_DOUBLE_PTR( shift );
57
59 *buff_tol,
61 *buff_smax,
64 buff_shift );
65
66 break;
67 }
68 }
69
70 return FLA_SUCCESS;
71}
FLA_Error FLA_Bsvd_compute_shift_opd(int m_A, double tol, double sminl, double smax, double *buff_d, int inc_d, double *buff_e, int inc_e, double *shift)
Definition FLA_Bsvd_compute_shift.c:130
FLA_Error FLA_Bsvd_compute_shift_ops(int m_A, float tol, float sminl, float smax, float *buff_d, int inc_d, float *buff_e, int inc_e, float *shift)
Definition FLA_Bsvd_compute_shift.c:75
dim_t FLA_Obj_vector_inc(FLA_Obj obj)
Definition FLA_Query.c:145
dim_t FLA_Obj_vector_dim(FLA_Obj obj)
Definition FLA_Query.c:137
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition FLA_Query.c:13
int FLA_Datatype
Definition FLA_type_defs.h:49
int i
Definition bl1_axmyv2.c:145

References FLA_Bsvd_compute_shift_opd(), FLA_Bsvd_compute_shift_ops(), FLA_Obj_datatype(), FLA_Obj_vector_dim(), FLA_Obj_vector_inc(), and i.

◆ FLA_Bsvd_compute_shift_opd()

FLA_Error FLA_Bsvd_compute_shift_opd ( int  m_A,
double  tol,
double  sminl,
double  smax,
double buff_d,
int  inc_d,
double buff_e,
int  inc_e,
double shift 
)
137{
138 double hndrth = 0.01;
139 double eps;
140 double* d_first;
141 double* e_last;
142 double* d_last_m1;
143 double* d_last;
144 double sll, temp;
145
147
148 d_first = buff_d + (0 )*inc_d;
149 e_last = buff_e + (m_A-2)*inc_e;
150 d_last_m1 = buff_d + (m_A-2)*inc_d;
151 d_last = buff_d + (m_A-1)*inc_d;
152
153 // If the shift would ruin relative accuracy, set it to zero.
154 if ( m_A * tol * ( sminl / smax ) <= max( eps, hndrth * tol ) )
155 {
156#ifdef PRINTF
157 printf( "FLA_Bsvd_compute_shift_opd: shift would ruin accuracy; setting shift to 0.\n" );
158 printf( " m_A = %d \n", m_A );
159 printf( " tol = %20.15e\n", tol );
160 printf( " sminl = %20.15e\n", sminl );
161 printf( " smax = %20.15e\n", smax );
162 printf( " LHS = %20.15e\n", m_A * tol * ( sminl / smax ) );
163 printf( " max(eps,0.01*tol)= %20.15e\n", max( eps, hndrth * tol ) );
164#endif
165 *shift = 0.0;
166 }
167 else
168 {
169 // Compute the shift from the last 2x2 matrix.
171 e_last,
172 d_last,
173 shift,
174 &temp );
175
176 sll = fabs( *d_first );
177
178 // Check if the shift is negligible; if so, set it to zero.
179 if ( sll > 0.0 )
180 {
181 temp = ( *shift / sll );
182 if ( temp * temp < eps )
183 {
184#ifdef PRINTF
185 printf( "FLA_Bsvd_compute_shift_opd: shift is negligible; setting shift to 0.\n" );
186#endif
187 *shift = 0.0;
188 }
189 }
190 }
191
192 return FLA_SUCCESS;
193}
double FLA_Mach_params_opd(FLA_Machval machval)
Definition FLA_Mach_params.c:74
FLA_Error FLA_Sv_2x2_opd(double *alpha11, double *alpha12, double *alpha22, double *sigma1, double *sigma2)
Definition FLA_Sv_2x2.c:166
dcomplex temp
Definition bl1_axpyv2b.c:301

References FLA_Mach_params_opd(), FLA_Sv_2x2_opd(), i, and temp.

Referenced by FLA_Bsvd_compute_shift(), and FLA_Bsvd_sinval_v_opd_var1().

◆ FLA_Bsvd_compute_shift_ops()

FLA_Error FLA_Bsvd_compute_shift_ops ( int  m_A,
float  tol,
float  sminl,
float  smax,
float buff_d,
int  inc_d,
float buff_e,
int  inc_e,
float shift 
)
82{
83 float hndrth = 0.01;
84 float eps;
85 float* d_first;
86 float* e_last;
87 float* d_last_m1;
88 float* d_last;
89 float sll, temp;
90
92
93 d_first = buff_d + (0 )*inc_d;
94 e_last = buff_e + (m_A-2)*inc_e;
95 d_last_m1 = buff_d + (m_A-2)*inc_d;
96 d_last = buff_d + (m_A-1)*inc_d;
97
98 // If the shift would ruin relative accuracy, set it to zero.
99 if ( m_A * tol * ( sminl / smax ) <= max( eps, hndrth * tol ) )
100 {
101 *shift = 0.0;
102 }
103 else
104 {
105 // Compute the shift from the last 2x2 matrix.
107 e_last,
108 d_last,
109 shift,
110 &temp );
111
112 sll = fabsf( *d_first );
113
114 // Check if the shift is negligible; if so, set it to zero.
115 if ( sll > 0.0F )
116 {
117 temp = ( *shift / sll );
118 if ( temp * temp < eps )
119 {
120 *shift = 0.0F;
121 }
122 }
123 }
124
125 return FLA_SUCCESS;
126}
float FLA_Mach_params_ops(FLA_Machval machval)
Definition FLA_Mach_params.c:47
FLA_Error FLA_Sv_2x2_ops(float *alpha11, float *alpha12, float *alpha22, float *sigma1, float *sigma2)
Definition FLA_Sv_2x2.c:83

References FLA_Mach_params_ops(), FLA_Sv_2x2_ops(), i, and temp.

Referenced by FLA_Bsvd_compute_shift(), and FLA_Bsvd_sinval_v_ops_var1().

◆ FLA_Bsvd_compute_tol_thresh()

FLA_Error FLA_Bsvd_compute_tol_thresh ( FLA_Obj  tolmul,
FLA_Obj  maxit,
FLA_Obj  d,
FLA_Obj  e,
FLA_Obj  tol,
FLA_Obj  thresh 
)
16{
17 FLA_Datatype datatype;
18 int n_A;
19 int inc_d;
20 int inc_e;
21
22 datatype = FLA_Obj_datatype( d );
23
25
28
29
30 switch ( datatype )
31 {
32 case FLA_FLOAT:
33 {
36 float* buff_d = FLA_FLOAT_PTR( d );
37 float* buff_e = FLA_FLOAT_PTR( e );
38 float* buff_tol = FLA_FLOAT_PTR( tol );
40
48
49 break;
50 }
51
52 case FLA_DOUBLE:
53 {
56 double* buff_d = FLA_DOUBLE_PTR( d );
57 double* buff_e = FLA_DOUBLE_PTR( e );
58 double* buff_tol = FLA_DOUBLE_PTR( tol );
60
68
69 break;
70 }
71 }
72
73 return FLA_SUCCESS;
74}
FLA_Error FLA_Bsvd_compute_tol_thresh_opd(int n_A, double tolmul, double maxitr, double *buff_d, int inc_d, double *buff_e, int inc_e, double *tol, double *thresh)
Definition FLA_Bsvd_compute_tol_thresh.c:137
FLA_Error FLA_Bsvd_compute_tol_thresh_ops(int n_A, float tolmul, float maxitr, float *buff_d, int inc_d, float *buff_e, int inc_e, float *tol, float *thresh)
Definition FLA_Bsvd_compute_tol_thresh.c:78

References FLA_Bsvd_compute_tol_thresh_opd(), FLA_Bsvd_compute_tol_thresh_ops(), FLA_Obj_datatype(), FLA_Obj_vector_dim(), FLA_Obj_vector_inc(), and i.

◆ FLA_Bsvd_compute_tol_thresh_opd()

FLA_Error FLA_Bsvd_compute_tol_thresh_opd ( int  m_A,
double  tolmul,
double  maxit,
double buff_d,
int  inc_d,
double buff_e,
int  inc_e,
double tol,
double thresh 
)
144{
145 double zero = bl1_d0();
146 double smin;
147 double eps, unfl;
148 double mu;
149 int i;
150
151 // Query machine epsilon and the safe minimum.
154
155 // Compute tol as the product of machine epsilon and tolmul.
156 *tol = tolmul * eps;
157
158 // Compute the approximate maximum singular value.
159 // NOT NEEDED unless we're supporting absolute accuracy.
160 //FLA_Bsvd_sinval_find_max( n_A,
161 // buff_d, inc_d,
162 // buff_e, inc_e,
163 // &smax );
164
165 // Compute the approximate minimum singular value.
166 smin = fabs( *buff_d );
167
168 // Skip the accumulation of smin if the first element is zero.
169 if ( smin != zero )
170 {
171 mu = smin;
172 for ( i = 1; i < n_A; ++i )
173 {
174 double* epsilon1 = buff_e + (i-1)*inc_e;
175 double* delta2 = buff_d + (i )*inc_d;
176
177 mu = fabs( *delta2 ) * ( mu / ( mu + fabs( *epsilon1 ) ) );
178 smin = min( smin, mu );
179
180 // Stop early if we encountered a zero.
181 if ( smin == zero ) break;
182 }
183 }
184
185 // Compute thresh either in terms of tol or as a function of the
186 // maximum total number of iterations, the problem size, and the
187 // safe minimum.
188 smin = smin / sqrt( ( double ) n_A );
189 *thresh = max( *tol * smin, maxitr * n_A * n_A * unfl );
190
191 return FLA_SUCCESS;
192}
double bl1_d0(void)
Definition bl1_constants.c:118

References bl1_d0(), FLA_Mach_params_opd(), and i.

Referenced by FLA_Bsvd_compute_tol_thresh(), FLA_Bsvd_ext_opd_var1(), FLA_Bsvd_ext_opz_var1(), FLA_Bsvd_v_opd_var1(), FLA_Bsvd_v_opd_var2(), FLA_Bsvd_v_opz_var1(), and FLA_Bsvd_v_opz_var2().

◆ FLA_Bsvd_compute_tol_thresh_ops()

FLA_Error FLA_Bsvd_compute_tol_thresh_ops ( int  m_A,
float  tolmul,
float  maxit,
float buff_d,
int  inc_d,
float buff_e,
int  inc_e,
float tol,
float thresh 
)
85{
86 float zero = bl1_s0();
87 float smin;
88 float eps, unfl;
89 float mu;
90 int i;
91
92 // Query machine epsilon and the safe minimum.
95
96 // Compute tol as the product of machine epsilon and tolmul.
97 *tol = tolmul * eps;
98
99 // Compute the approximate maximum singular value.
100 // NOT NEEDED unless we're supporting absolute accuracy.
101 //FLA_Bsvd_sinval_find_max( n_A,
102 // buff_d, inc_d,
103 // buff_e, inc_e,
104 // &smax );
105
106 // Compute the approximate minimum singular value.
107 smin = fabsf( *buff_d );
108
109 // Skip the accumulation of smin if the first element is zero.
110 if ( smin != zero )
111 {
112 mu = smin;
113 for ( i = 1; i < n_A; ++i )
114 {
115 float* epsilon1 = buff_e + (i-1)*inc_e;
116 float* delta2 = buff_d + (i )*inc_d;
117
118 mu = fabsf( *delta2 ) * ( mu / ( mu + fabsf( *epsilon1 ) ) );
119 smin = min( smin, mu );
120
121 // Stop early if we encountered a zero.
122 if ( smin == zero ) break;
123 }
124 }
125
126 // Compute thresh either in terms of tol or as a function of the
127 // maximum total number of iterations, the problem size, and the
128 // safe minimum.
129 smin = smin / sqrtf( ( float ) n_A );
130 *thresh = max( *tol * smin, maxitr * n_A * n_A * unfl );
131
132 return FLA_SUCCESS;
133}
float bl1_s0(void)
Definition bl1_constants.c:111

References bl1_s0(), FLA_Mach_params_ops(), and i.

Referenced by FLA_Bsvd_compute_tol_thresh(), FLA_Bsvd_ext_opc_var1(), FLA_Bsvd_ext_ops_var1(), FLA_Bsvd_v_opc_var1(), and FLA_Bsvd_v_ops_var1().

◆ FLA_Bsvd_find_converged()

FLA_Error FLA_Bsvd_find_converged ( FLA_Obj  tol,
FLA_Obj  d,
FLA_Obj  e,
FLA_Obj  sminl 
)
14{
15 FLA_Datatype datatype;
16 int m_A;
17 int inc_d;
18 int inc_e;
19
20 datatype = FLA_Obj_datatype( d );
21
23
26
27
28 switch ( datatype )
29 {
30 case FLA_FLOAT:
31 {
32 float* buff_tol = FLA_FLOAT_PTR( tol );
33 float* buff_d = FLA_FLOAT_PTR( d );
34 float* buff_e = FLA_FLOAT_PTR( e );
35 float* buff_sminl = FLA_FLOAT_PTR( sminl );
36
38 *buff_tol,
41 buff_sminl );
42
43 break;
44 }
45
46 case FLA_DOUBLE:
47 {
48 double* buff_tol = FLA_DOUBLE_PTR( tol );
49 double* buff_d = FLA_DOUBLE_PTR( d );
50 double* buff_e = FLA_DOUBLE_PTR( e );
51 double* buff_sminl = FLA_DOUBLE_PTR( sminl );
52
54 *buff_tol,
57 buff_sminl );
58
59 break;
60 }
61 }
62
63 return FLA_SUCCESS;
64}
FLA_Error FLA_Bsvd_find_converged_ops(int m_A, float tol, float *buff_d, int inc_d, float *buff_e, int inc_e, float *sminl)
Definition FLA_Bsvd_find_converged.c:68
FLA_Error FLA_Bsvd_find_converged_opd(int m_A, double tol, double *buff_d, int inc_d, double *buff_e, int inc_e, double *sminl)
Definition FLA_Bsvd_find_converged.c:117

References FLA_Bsvd_find_converged_opd(), FLA_Bsvd_find_converged_ops(), FLA_Obj_datatype(), FLA_Obj_vector_dim(), FLA_Obj_vector_inc(), and i.

◆ FLA_Bsvd_find_converged_opd()

FLA_Error FLA_Bsvd_find_converged_opd ( int  m_A,
double  tol,
double buff_d,
int  inc_d,
double buff_e,
int  inc_e,
double sminl 
)
122{
123 double* epsilon_last;
124 double* delta_last;
125 double mu;
126 int i;
127
129 delta_last = buff_d + (m_A-1)*inc_d;
130
131 // Check convergence at the bottom of the matrix first.
133 {
134 //*epsilon_last = 0.0;
135 *sminl = 0.0;
136 return m_A - 2;
137 }
138
139 // If the last element is not yet converged, check interior elements.
140 // Also, accumulate sminl for later use when it comes time to check
141 // the shift.
142
143 mu = fabs( *buff_d );
144 *sminl = mu;
145
146 for ( i = 0; i < m_A - 1; ++i )
147 {
148 double* epsilon1 = buff_e + (i )*inc_e;
149 double* delta2 = buff_d + (i+1)*inc_d;
150
151 // Check convergence of epsilon1 against the value of mu accumulated
152 // so far.
154 {
155//printf( "FLA_Bsvd_sinval_find_converged: Split occurred in col %d\n", i );
156//printf( "FLA_Bsvd_sinval_find_converged: mu alpha12 = %23.19e %23.19e\n", mu, *epsilon1 );
157//printf( "FLA_Bsvd_sinval_find_converged: alpha22 = %43.19e\n", *delta2 );
158 //*epsilon1 = 0.0;
159 //return FLA_FAILURE;
160 return i;
161 }
162
163 // Update mu and sminl.
164 mu = fabs( *delta2 ) * ( mu / ( mu + fabs( *epsilon1 ) ) );
165 *sminl = min( *sminl, mu );
166 }
167
168 // Return with no convergence found.
169 return FLA_SUCCESS;
170}

References i.

Referenced by FLA_Bsvd_find_converged(), and FLA_Bsvd_sinval_v_opd_var1().

◆ FLA_Bsvd_find_converged_ops()

FLA_Error FLA_Bsvd_find_converged_ops ( int  m_A,
float  tol,
float buff_d,
int  inc_d,
float buff_e,
int  inc_e,
float sminl 
)
73{
74 float* epsilon_last;
75 float* delta_last;
76 float mu;
77 int i;
78
80 delta_last = buff_d + (m_A-1)*inc_d;
81
82 // Check convergence at the bottom of the matrix first.
84 {
85 *sminl = 0.0F;
86 return m_A - 2;
87 }
88
89 // If the last element is not yet converged, check interior elements.
90 // Also, accumulate sminl for later use when it comes time to check
91 // the shift.
92
93 mu = fabsf( *buff_d );
94 *sminl = mu;
95
96 for ( i = 0; i < m_A - 1; ++i )
97 {
98 float* epsilon1 = buff_e + (i )*inc_e;
99 float* delta2 = buff_d + (i+1)*inc_d;
100
101 // Check convergence of epsilon1 against the value of mu accumulated
102 // so far.
104 {
105 return i;
106 }
107
108 // Update mu and sminl.
109 mu = fabsf( *delta2 ) * ( mu / ( mu + fabsf( *epsilon1 ) ) );
110 *sminl = min( *sminl, mu );
111 }
112
113 // Return with no convergence found.
114 return FLA_SUCCESS;
115}

References i.

Referenced by FLA_Bsvd_find_converged(), and FLA_Bsvd_sinval_v_ops_var1().

◆ FLA_Bsvd_find_max_min()

FLA_Error FLA_Bsvd_find_max_min ( FLA_Obj  d,
FLA_Obj  e,
FLA_Obj  smax,
FLA_Obj  smin 
)

◆ FLA_Bsvd_find_max_min_opd()

FLA_Error FLA_Bsvd_find_max_min_opd ( int  m_A,
double buff_d,
int  inc_d,
double buff_e,
int  inc_e,
double smax,
double smin 
)
108{
109 double smax_cand;
110 double smin_cand;
111 int i;
112
113 smax_cand = fabs( buff_d[ (m_A-1)*inc_d ] );
115
116 for ( i = 0; i < m_A - 1; ++i )
117 {
118 double abs_di = fabs( buff_d[ i*inc_d ] );
119 double abs_ei = fabs( buff_e[ i*inc_e ] );
120
121 // Track the minimum element.
123
124 // Track the maximum element.
127 }
128
129 // Save the results of the search.
130 *smax = smax_cand;
131 *smin = smin_cand;
132
133 return FLA_SUCCESS;
134}

References i.

Referenced by FLA_Bsvd_find_max(), and FLA_Bsvd_sinval_v_opd_var1().

◆ FLA_Bsvd_find_max_min_ops()

FLA_Error FLA_Bsvd_find_max_min_ops ( int  m_A,
float buff_d,
int  inc_d,
float buff_e,
int  inc_e,
float smax,
float smin 
)
73{
74 float smax_cand;
75 float smin_cand;
76 int i;
77
78 smax_cand = fabsf( buff_d[ (m_A-1)*inc_d ] );
80
81 for ( i = 0; i < m_A - 1; ++i )
82 {
83 float abs_di = fabsf( buff_d[ i*inc_d ] );
84 float abs_ei = fabsf( buff_e[ i*inc_e ] );
85
86 // Track the minimum element.
88
89 // Track the maximum element.
92 }
93
94 // Save the results of the search.
95 *smax = smax_cand;
96 *smin = smin_cand;
97
98 return FLA_SUCCESS;
99}

References i.

Referenced by FLA_Bsvd_find_max(), and FLA_Bsvd_sinval_v_ops_var1().

◆ FLA_Bsvd_find_submatrix_opd()

FLA_Error FLA_Bsvd_find_submatrix_opd ( int  mn_A,
int  ij_begin,
double buff_d,
int  inc_d,
double buff_e,
int  inc_e,
int ijTL,
int ijBR 
)
79{
80 double rzero = bl1_d0();
81 int ij_tl;
82 int ij_br;
83
84 // Search for the first non-zero superdiagonal element starting at
85 // the index specified by ij_begin.
86 for ( ij_tl = ij_begin; ij_tl < mn_A - 1; ++ij_tl )
87 {
88 double* e1 = buff_e + (ij_tl )*inc_e;
89
90 // If we find a non-zero element, record it and break out of this
91 // loop.
92 if ( *e1 != rzero )
93 {
94#ifdef PRINTF
95 printf( "FLA_Bsvd_find_submatrix_opd: found non-zero superdiagonal element\n" );
96 printf( " e[%3d] = %22.19e\n", ij_tl, *e1 );
97#endif
98 *ijTL = ij_tl;
99 break;
100 }
101 }
102
103 // If ij_tl was incremented all the way up to mn_A - 1, then we didn't
104 // find any non-zeros.
105 if ( ij_tl == mn_A - 1 )
106 {
107#ifdef PRINTF
108 printf( "FLA_Bsvd_find_submatrix_opd: no submatrices found.\n" );
109#endif
110 return FLA_FAILURE;
111 }
112
113 // If we've gotten this far, then a non-zero superdiagonal element was
114 // found. Now we must walk the remaining portion of the superdiagonal
115 // to find the first zero element, or if one is not found, we simply
116 // use the last element of the superdiagonal.
117 for ( ij_br = ij_tl; ij_br < mn_A - 1; ++ij_br )
118 {
119 double* e1 = buff_e + (ij_br )*inc_e;
120
121 // If we find a zero element, record it and break out of this
122 // loop.
123 if ( *e1 == rzero )
124 {
125#ifdef PRINTF
126 printf( "FLA_Bsvd_find_submatrix_opd: found zero superdiagonal element\n" );
127 printf( " e[%3d] = %22.19e\n", ij_br, *e1 );
128#endif
129 break;
130 }
131 }
132
133 // If a zero element was found, then ij_br should hold the index of
134 // that element. If a zero element was not found, then ij_br should
135 // hold mn_A - 1. Either way, we save the value and return success.
136 *ijBR = ij_br;
137
138 return FLA_SUCCESS;
139}

References bl1_d0(), and i.

Referenced by FLA_Bsvd_ext_opd_var1(), FLA_Bsvd_ext_opz_var1(), FLA_Bsvd_v_opd_var1(), FLA_Bsvd_v_opd_var2(), FLA_Bsvd_v_opz_var1(), and FLA_Bsvd_v_opz_var2().

◆ FLA_Bsvd_find_submatrix_ops()

FLA_Error FLA_Bsvd_find_submatrix_ops ( int  mn_A,
int  ij_begin,
float buff_d,
int  inc_d,
float buff_e,
int  inc_e,
int ijTL,
int ijBR 
)
20{
21 float rzero = bl1_s0();
22 int ij_tl;
23 int ij_br;
24
25 // Search for the first non-zero superdiagonal element starting at
26 // the index specified by ij_begin.
27 for ( ij_tl = ij_begin; ij_tl < mn_A - 1; ++ij_tl )
28 {
29 float* e1 = buff_e + (ij_tl )*inc_e;
30
31 // If we find a non-zero element, record it and break out of this
32 // loop.
33 if ( *e1 != rzero )
34 {
35 *ijTL = ij_tl;
36 break;
37 }
38 }
39
40 // If ij_tl was incremented all the way up to mn_A - 1, then we didn't
41 // find any non-zeros.
42 if ( ij_tl == mn_A - 1 )
43 {
44 return FLA_FAILURE;
45 }
46
47 // If we've gotten this far, then a non-zero superdiagonal element was
48 // found. Now we must walk the remaining portion of the superdiagonal
49 // to find the first zero element, or if one is not found, we simply
50 // use the last element of the superdiagonal.
51 for ( ij_br = ij_tl; ij_br < mn_A - 1; ++ij_br )
52 {
53 float* e1 = buff_e + (ij_br )*inc_e;
54
55 // If we find a zero element, record it and break out of this
56 // loop.
57 if ( *e1 == rzero )
58 {
59 break;
60 }
61 }
62
63 // If a zero element was found, then ij_br should hold the index of
64 // that element. If a zero element was not found, then ij_br should
65 // hold mn_A - 1. Either way, we save the value and return success.
66 *ijBR = ij_br;
67
68 return FLA_SUCCESS;
69}

References bl1_s0(), and i.

Referenced by FLA_Bsvd_ext_opc_var1(), FLA_Bsvd_ext_ops_var1(), FLA_Bsvd_v_opc_var1(), and FLA_Bsvd_v_ops_var1().

◆ FLA_Bsvd_v_opc_var1()

FLA_Error FLA_Bsvd_v_opc_var1 ( int  min_m_n,
int  m_U,
int  m_V,
int  n_GH,
int  n_iter_max,
float buff_d,
int  inc_d,
float buff_e,
int  inc_e,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_H,
int  rs_H,
int  cs_H,
scomplex buff_U,
int  rs_U,
int  cs_U,
scomplex buff_V,
int  rs_V,
int  cs_V,
int  b_alg 
)
686{
687 scomplex one = bl1_c1();
688 float rzero = bl1_s0();
689
690 int maxitr = 6;
691
692 float eps;
693 float tolmul;
694 float tol;
695 float thresh;
696
697 scomplex* G;
698 scomplex* H;
699 float* d1;
700 float* e1;
701 int r_val;
702 int done;
703 int m_GH_sweep_max;
704 int ij_begin;
705 int ijTL, ijBR;
706 int m_A11;
707 int n_iter_perf;
708 int n_UV_apply;
710 int n_deflations;
711 int n_iter_prev;
713
714 // Compute some convergence constants.
716 tolmul = max( 10.0F, min( 100.0F, powf( eps, -0.125F ) ) );
718 tolmul,
719 maxitr,
720 buff_d, inc_d,
721 buff_e, inc_e,
722 &tol,
723 &thresh );
724
725 // Initialize our completion flag.
726 done = FALSE;
727
728 // Initialize a counter that holds the maximum number of rows of G
729 // and H that we would need to initialize for the next sweep.
731
732 // Initialize a counter for the total number of iterations performed.
733 n_iter_prev = 0;
734
735 // Iterate until the matrix has completely deflated.
736 for ( total_deflations = 0; done != TRUE; )
737 {
738
739 // Initialize G and H to contain only identity rotations.
741 n_GH,
742 &one,
743 buff_G, rs_G, cs_G );
745 n_GH,
746 &one,
747 buff_H, rs_H, cs_H );
748
749 // Keep track of the maximum number of iterations performed in the
750 // current sweep. This is used when applying the sweep's Givens
751 // rotations.
753
754 // Perform a sweep: Move through the matrix and perform a bidiagonal
755 // SVD on each non-zero submatrix that is encountered. During the
756 // first time through, ijTL will be 0 and ijBR will be min_m_n - 1.
757 for ( ij_begin = 0; ij_begin < min_m_n; )
758 {
759 // Search for the first submatrix along the diagonal that is
760 // bounded by zeroes (or endpoints of the matrix). If no
761 // submatrix is found (ie: if the entire superdiagonal is zero
762 // then FLA_FAILURE is returned. This function also inspects
763 // superdiagonal elements for proximity to zero. If a given
764 // element is close enough to zero, then it is deemed
765 // converged and manually set to zero.
767 ij_begin,
768 buff_d, inc_d,
769 buff_e, inc_e,
770 &ijTL,
771 &ijBR );
772
773 // Verify that a submatrix was found. If one was not found,
774 // then we are done with the current sweep. Furthermore, if
775 // a submatrix was not found AND we began our search at the
776 // beginning of the matrix (ie: ij_begin == 0), then the
777 // matrix has completely deflated and so we are done with
778 // Francis step iteration.
779 if ( r_val == FLA_FAILURE )
780 {
781 if ( ij_begin == 0 )
782 {
783 done = TRUE;
784 }
785
786 // Break out of the current sweep so we can apply the last
787 // remaining Givens rotations.
788 break;
789 }
790
791 // If we got this far, then:
792 // (a) ijTL refers to the index of the first non-zero
793 // superdiagonal along the diagonal, and
794 // (b) ijBR refers to either:
795 // - the first zero element that occurs after ijTL, or
796 // - the the last diagonal element.
797 // Note that ijTL and ijBR also correspond to the first and
798 // last diagonal elements of the submatrix of interest. Thus,
799 // we may compute the dimension of this submatrix as:
800 m_A11 = ijBR - ijTL + 1;
801
802 // Adjust ij_begin, which gets us ready for the next submatrix
803 // search in the current sweep.
804 ij_begin = ijBR + 1;
805
806 // Index to the submatrices upon which we will operate.
807 d1 = buff_d + ijTL * inc_d;
808 e1 = buff_e + ijTL * inc_e;
809 G = buff_G + ijTL * rs_G;
810 H = buff_H + ijTL * rs_H;
811
812 // Search for a batch of singular values, recursing on deflated
813 // subproblems whenever a split occurs. Iteration continues as
814 // long as
815 // (a) there is still matrix left to operate on, and
816 // (b) the number of iterations performed in this batch is
817 // less than n_G.
818 // If/when either of the two above conditions fails to hold,
819 // the function returns.
821 n_GH,
822 ijTL,
823 tol,
824 thresh,
825 d1, inc_d,
826 e1, inc_e,
827 G, rs_G, cs_G,
828 H, rs_H, cs_H,
829 &n_iter_perf );
830
831 // Record the number of deflations that were observed.
833
834 // Update the maximum number of iterations performed in the
835 // current sweep.
837
838 // Store the most recent value of ijBR in m_G_sweep_max.
839 // When the sweep is done, this value will contain the minimum
840 // number of rows of G and H we can apply and safely include all
841 // non-identity rotations that were computed during the
842 // singular value searches.
844
845 // Make sure we haven't exceeded our maximum iteration count.
846 if ( n_iter_prev >= n_iter_max * min_m_n )
847 {
848 FLA_Abort();
849 //return FLA_FAILURE;
850 }
851 }
852
853 // The sweep is complete. Now we must apply the Givens rotations
854 // that were accumulated during the sweep.
855
856 // Recall that the number of columns of U and V to which we apply
857 // rotations is one more than the number of rotations.
859
860 // Apply the Givens rotations. Note that we only apply k sets of
861 // rotations, where k = n_iter_perf_sweep_max. Also note that we only
862 // apply to n_UV_apply columns of U and V since this is the most we
863 // need to touch given the most recent value stored to m_GH_sweep_max.
864 //FLA_Apply_G_rf_blc_var5( n_iter_perf_sweep_max,
866 //FLA_Apply_G_rf_blz_var9( n_iter_perf_sweep_max,
867 //FLA_Apply_G_rf_blz_var6( n_iter_perf_sweep_max,
868 m_U,
870 buff_G, rs_G, cs_G,
871 buff_U, rs_U, cs_U,
872 b_alg );
873 //FLA_Apply_G_rf_blc_var5( n_iter_perf_sweep_max,
875 //FLA_Apply_G_rf_blz_var9( n_iter_perf_sweep_max,
876 //FLA_Apply_G_rf_blz_var6( n_iter_perf_sweep_max,
877 m_V,
879 buff_H, rs_H, cs_H,
880 buff_V, rs_V, cs_V,
881 b_alg );
882
883 // Increment the total number of iterations previously performed.
885 }
886
887 // Make all the singular values positive.
888 {
889 int i;
890 float minus_one = bl1_sm1();
891
892 for ( i = 0; i < min_m_n; ++i )
893 {
894 if ( buff_d[ (i )*inc_d ] < rzero )
895 {
896 buff_d[ (i )*inc_d ] = -buff_d[ (i )*inc_d ];
897
898 // Scale the right singular vectors.
900 m_V,
901 &minus_one,
902 buff_V + (i )*cs_V, rs_V );
903 }
904 }
905 }
906
907 return FLA_SUCCESS;
908}
FLA_Error FLA_Apply_G_rf_blc_var3(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg)
Definition FLA_Apply_G_rf_blk_var3.c:157
FLA_Error FLA_Bsvd_find_submatrix_ops(int mn_A, int ij_begin, float *buff_d, int inc_d, float *buff_e, int inc_e, int *ijTL, int *ijBR)
Definition FLA_Bsvd_find_submatrix.c:14
FLA_Error FLA_Bsvd_iteracc_v_ops_var1(int m_A, int n_GH, int ijTL, float tol, float thresh, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_H, int rs_H, int cs_H, int *n_iter_perf)
Definition FLA_Bsvd_iteracc_v_opt_var1.c:13
void FLA_Abort(void)
Definition FLA_Error.c:248
void bl1_csscalv(conj1_t conj, int n, float *alpha, scomplex *x, int incx)
Definition bl1_scalv.c:35
scomplex bl1_c1(void)
Definition bl1_constants.c:61
float bl1_sm1(void)
Definition bl1_constants.c:175
void bl1_csetm(int m, int n, scomplex *sigma, scomplex *a, int a_rs, int a_cs)
Definition bl1_setm.c:61
@ BLIS1_NO_CONJUGATE
Definition blis_type_defs.h:81
Definition blis_type_defs.h:133

References bl1_c1(), bl1_csetm(), bl1_csscalv(), bl1_s0(), bl1_sm1(), BLIS1_NO_CONJUGATE, FLA_Abort(), FLA_Apply_G_rf_blc_var3(), FLA_Bsvd_compute_tol_thresh_ops(), FLA_Bsvd_find_submatrix_ops(), FLA_Bsvd_iteracc_v_ops_var1(), FLA_Mach_params_ops(), and i.

Referenced by FLA_Bsvd_v_opt_var1().

◆ FLA_Bsvd_v_opc_var2()

FLA_Error FLA_Bsvd_v_opc_var2 ( int  min_m_n,
int  m_U,
int  m_V,
int  n_GH,
int  n_iter_max,
float buff_d,
int  inc_d,
float buff_e,
int  inc_e,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_H,
int  rs_H,
int  cs_H,
float buff_RG,
int  rs_RG,
int  cs_RG,
float buff_RH,
int  rs_RH,
int  cs_RH,
scomplex buff_W,
int  rs_W,
int  cs_W,
scomplex buff_U,
int  rs_U,
int  cs_U,
scomplex buff_V,
int  rs_V,
int  cs_V,
int  b_alg 
)
544{
546
547 return FLA_SUCCESS;
548}

References i.

Referenced by FLA_Bsvd_v_opt_var2().

◆ FLA_Bsvd_v_opd_var1()

FLA_Error FLA_Bsvd_v_opd_var1 ( int  min_m_n,
int  m_U,
int  m_V,
int  n_GH,
int  n_iter_max,
double buff_d,
int  inc_d,
double buff_e,
int  inc_e,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_H,
int  rs_H,
int  cs_H,
double buff_U,
int  rs_U,
int  cs_U,
double buff_V,
int  rs_V,
int  cs_V,
int  b_alg 
)
409{
410 dcomplex one = bl1_z1();
411 double rzero = bl1_d0();
412
413 int maxitr = 6;
414
415 double eps;
416 double tolmul;
417 double tol;
418 double thresh;
419
420 dcomplex* G;
421 dcomplex* H;
422 double* d1;
423 double* e1;
424 int r_val;
425 int done;
426 int m_GH_sweep_max;
427 int ij_begin;
428 int ijTL, ijBR;
429 int m_A11;
430 int n_iter_perf;
431 int n_UV_apply;
433 int n_deflations;
434 int n_iter_prev;
436
437 // Compute some convergence constants.
439 tolmul = max( 10.0, min( 100.0, pow( eps, -0.125 ) ) );
441 tolmul,
442 maxitr,
443 buff_d, inc_d,
444 buff_e, inc_e,
445 &tol,
446 &thresh );
447#ifdef PRINTF
448 printf( "FLA_Bsvd_v_opd_var1: tolmul = %12.6e\n", tolmul );
449 printf( "FLA_Bsvd_v_opd_var1: tol = %12.6e\n", tol );
450 printf( "FLA_Bsvd_v_opd_var1: thresh = %12.6e\n", thresh );
451#endif
452
453 // Initialize our completion flag.
454 done = FALSE;
455
456 // Initialize a counter that holds the maximum number of rows of G
457 // and H that we would need to initialize for the next sweep.
459
460 // Initialize a counter for the total number of iterations performed.
461 n_iter_prev = 0;
462
463 // Iterate until the matrix has completely deflated.
464 for ( total_deflations = 0; done != TRUE; )
465 {
466
467 // Initialize G and H to contain only identity rotations.
469 n_GH,
470 &one,
471 buff_G, rs_G, cs_G );
473 n_GH,
474 &one,
475 buff_H, rs_H, cs_H );
476
477 // Keep track of the maximum number of iterations performed in the
478 // current sweep. This is used when applying the sweep's Givens
479 // rotations.
481
482 // Perform a sweep: Move through the matrix and perform a bidiagonal
483 // SVD on each non-zero submatrix that is encountered. During the
484 // first time through, ijTL will be 0 and ijBR will be min_m_n - 1.
485 for ( ij_begin = 0; ij_begin < min_m_n; )
486 {
487
488#ifdef PRINTF
489 if ( ij_begin == 0 )
490 printf( "FLA_Bsvd_v_opd_var1: beginning new sweep (ij_begin = %d)\n", ij_begin );
491#endif
492
493 // Search for the first submatrix along the diagonal that is
494 // bounded by zeroes (or endpoints of the matrix). If no
495 // submatrix is found (ie: if the entire superdiagonal is zero
496 // then FLA_FAILURE is returned. This function also inspects
497 // superdiagonal elements for proximity to zero. If a given
498 // element is close enough to zero, then it is deemed
499 // converged and manually set to zero.
501 ij_begin,
502 buff_d, inc_d,
503 buff_e, inc_e,
504 &ijTL,
505 &ijBR );
506
507 // Verify that a submatrix was found. If one was not found,
508 // then we are done with the current sweep. Furthermore, if
509 // a submatrix was not found AND we began our search at the
510 // beginning of the matrix (ie: ij_begin == 0), then the
511 // matrix has completely deflated and so we are done with
512 // Francis step iteration.
513 if ( r_val == FLA_FAILURE )
514 {
515 if ( ij_begin == 0 )
516 {
517#ifdef PRINTF
518 printf( "FLA_Bsvd_v_opd_var1: superdiagonal is completely zero.\n" );
519 printf( "FLA_Bsvd_v_opd_var1: Francis iteration is done!\n" );
520#endif
521 done = TRUE;
522 }
523
524 // Break out of the current sweep so we can apply the last
525 // remaining Givens rotations.
526 break;
527 }
528
529 // If we got this far, then:
530 // (a) ijTL refers to the index of the first non-zero
531 // superdiagonal along the diagonal, and
532 // (b) ijBR refers to either:
533 // - the first zero element that occurs after ijTL, or
534 // - the the last diagonal element.
535 // Note that ijTL and ijBR also correspond to the first and
536 // last diagonal elements of the submatrix of interest. Thus,
537 // we may compute the dimension of this submatrix as:
538 m_A11 = ijBR - ijTL + 1;
539
540#ifdef PRINTF
541 printf( "FLA_Bsvd_v_opd_var1: ij_begin = %d\n", ij_begin );
542 printf( "FLA_Bsvd_v_opd_var1: ijTL = %d\n", ijTL );
543 printf( "FLA_Bsvd_v_opd_var1: ijBR = %d\n", ijBR );
544 printf( "FLA_Bsvd_v_opd_var1: m_A11 = %d\n", m_A11 );
545#endif
546
547 // Adjust ij_begin, which gets us ready for the next submatrix
548 // search in the current sweep.
549 ij_begin = ijBR + 1;
550
551 // Index to the submatrices upon which we will operate.
552 d1 = buff_d + ijTL * inc_d;
553 e1 = buff_e + ijTL * inc_e;
554 G = buff_G + ijTL * rs_G;
555 H = buff_H + ijTL * rs_H;
556
557 // Search for a batch of singular values, recursing on deflated
558 // subproblems whenever a split occurs. Iteration continues as
559 // long as
560 // (a) there is still matrix left to operate on, and
561 // (b) the number of iterations performed in this batch is
562 // less than n_G.
563 // If/when either of the two above conditions fails to hold,
564 // the function returns.
566 n_GH,
567 ijTL,
568 tol,
569 thresh,
570 d1, inc_d,
571 e1, inc_e,
572 G, rs_G, cs_G,
573 H, rs_H, cs_H,
574 &n_iter_perf );
575
576 // Record the number of deflations that were observed.
578
579 // Update the maximum number of iterations performed in the
580 // current sweep.
582
583#ifdef PRINTF
584 printf( "FLA_Bsvd_v_opd_var1: deflations observed = %d\n", n_deflations );
585 printf( "FLA_Bsvd_v_opd_var1: total deflations observed = %d\n", total_deflations );
586 printf( "FLA_Bsvd_v_opd_var1: num iterations performed = %d\n", n_iter_perf );
587#endif
588
589 // Store the most recent value of ijBR in m_G_sweep_max.
590 // When the sweep is done, this value will contain the minimum
591 // number of rows of G and H we can apply and safely include all
592 // non-identity rotations that were computed during the
593 // singular value searches.
595
596 // Make sure we haven't exceeded our maximum iteration count.
597 if ( n_iter_prev >= n_iter_max * min_m_n )
598 {
599#ifdef PRINTF
600 printf( "FLA_Bsvd_v_opd_var1: reached maximum total number of iterations: %d\n", n_iter_prev );
601#endif
602 FLA_Abort();
603 //return FLA_FAILURE;
604 }
605 }
606
607 // The sweep is complete. Now we must apply the Givens rotations
608 // that were accumulated during the sweep.
609
610 // Recall that the number of columns of U and V to which we apply
611 // rotations is one more than the number of rotations.
613
614#ifdef PRINTF
615 printf( "FLA_Bsvd_v_opd_var1: applying %d sets of Givens rotations\n", n_iter_perf_sweep_max );
616 printf( "FLA_Bsvd_v_opd_var1: m_U = %d\n", m_U );
617 printf( "FLA_Bsvd_v_opd_var1: m_V = %d\n", m_V );
618 printf( "FLA_Bsvd_v_opd_var1: napp= %d\n", n_UV_apply );
619#endif
620 // Apply the Givens rotations. Note that we only apply k sets of
621 // rotations, where k = n_iter_perf_sweep_max. Also note that we only
622 // apply to n_UV_apply columns of U and V since this is the most we
623 // need to touch given the most recent value stored to m_GH_sweep_max.
624 //FLA_Apply_G_rf_bld_var5( n_iter_perf_sweep_max,
626 //FLA_Apply_G_rf_bld_var9( n_iter_perf_sweep_max,
627 //FLA_Apply_G_rf_bld_var6( n_iter_perf_sweep_max,
628 m_U,
630 buff_G, rs_G, cs_G,
631 buff_U, rs_U, cs_U,
632 b_alg );
633 //FLA_Apply_G_rf_bld_var5( n_iter_perf_sweep_max,
635 //FLA_Apply_G_rf_bld_var9( n_iter_perf_sweep_max,
636 //FLA_Apply_G_rf_bld_var6( n_iter_perf_sweep_max,
637 m_V,
639 buff_H, rs_H, cs_H,
640 buff_V, rs_V, cs_V,
641 b_alg );
642
643 // Increment the total number of iterations previously performed.
645
646#ifdef PRINTF
647 printf( "FLA_Bsvd_v_opd_var1: total number of iterations performed: %d\n", n_iter_prev );
648#endif
649 }
650
651 // Make all the singular values positive.
652 {
653 int i;
654 double minus_one = bl1_dm1();
655
656 for ( i = 0; i < min_m_n; ++i )
657 {
658 if ( buff_d[ (i )*inc_d ] < rzero )
659 {
660 buff_d[ (i )*inc_d ] = -buff_d[ (i )*inc_d ];
661
662 // Scale the right singular vectors.
664 m_V,
665 &minus_one,
666 buff_V + (i )*cs_V, rs_V );
667 }
668 }
669 }
670
671 return n_iter_prev;
672}
FLA_Error FLA_Apply_G_rf_bld_var3(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg)
Definition FLA_Apply_G_rf_blk_var3.c:128
FLA_Error FLA_Bsvd_find_submatrix_opd(int mn_A, int ij_begin, double *buff_d, int inc_d, double *buff_e, int inc_e, int *ijTL, int *ijBR)
Definition FLA_Bsvd_find_submatrix.c:73
FLA_Error FLA_Bsvd_iteracc_v_opd_var1(int m_A, int n_GH, int ijTL, double tol, double thresh, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_H, int rs_H, int cs_H, int *n_iter_perf)
Definition FLA_Bsvd_iteracc_v_opt_var1.c:176
void bl1_dscalv(conj1_t conj, int n, double *alpha, double *x, int incx)
Definition bl1_scalv.c:24
double bl1_dm1(void)
Definition bl1_constants.c:182
dcomplex bl1_z1(void)
Definition bl1_constants.c:69
void bl1_zsetm(int m, int n, dcomplex *sigma, dcomplex *a, int a_rs, int a_cs)
Definition bl1_setm.c:78
Definition blis_type_defs.h:138

References bl1_d0(), bl1_dm1(), bl1_dscalv(), bl1_z1(), bl1_zsetm(), BLIS1_NO_CONJUGATE, FLA_Abort(), FLA_Apply_G_rf_bld_var3(), FLA_Bsvd_compute_tol_thresh_opd(), FLA_Bsvd_find_submatrix_opd(), FLA_Bsvd_iteracc_v_opd_var1(), FLA_Mach_params_opd(), and i.

Referenced by FLA_Bsvd_v_opt_var1().

◆ FLA_Bsvd_v_opd_var2()

FLA_Error FLA_Bsvd_v_opd_var2 ( int  min_m_n,
int  m_U,
int  m_V,
int  n_GH,
int  n_iter_max,
double buff_d,
int  inc_d,
double buff_e,
int  inc_e,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_H,
int  rs_H,
int  cs_H,
double buff_RG,
int  rs_RG,
int  cs_RG,
double buff_RH,
int  rs_RH,
int  cs_RH,
double buff_W,
int  rs_W,
int  cs_W,
double buff_U,
int  rs_U,
int  cs_U,
double buff_V,
int  rs_V,
int  cs_V,
int  b_alg 
)
229{
230 dcomplex one = bl1_z1();
231 double rone = bl1_d1();
232 double rzero = bl1_d0();
233
234 int maxitr = 6;
235
236 double eps;
237 double tolmul;
238 double tol;
239 double thresh;
240
241 dcomplex* G;
242 dcomplex* H;
243 double* d1;
244 double* e1;
245 int r_val;
246 int done;
247 int m_GH_sweep_max;
248 int ij_begin;
249 int ijTL, ijBR;
250 int m_A11;
251 int n_iter_perf;
252 int n_UV_apply;
254 int n_deflations;
255 int n_iter_prev;
257
258 // Compute some convergence constants.
260 tolmul = max( 10.0, min( 100.0, pow( eps, -0.125 ) ) );
262 tolmul,
263 maxitr,
264 buff_d, inc_d,
265 buff_e, inc_e,
266 &tol,
267 &thresh );
268
269 // Initialize our completion flag.
270 done = FALSE;
271
272 // Initialize a counter that holds the maximum number of rows of G
273 // and H that we would need to initialize for the next sweep.
275
276 // Initialize a counter for the total number of iterations performed.
277 n_iter_prev = 0;
278
279 // Initialize RG and RH to identity.
281 buff_RG, rs_RG, cs_RG );
283 buff_RH, rs_RH, cs_RH );
284
285 // Iterate until the matrix has completely deflated.
286 for ( total_deflations = 0; done != TRUE; )
287 {
288
289 // Initialize G and H to contain only identity rotations.
291 n_GH,
292 &one,
293 buff_G, rs_G, cs_G );
295 n_GH,
296 &one,
297 buff_H, rs_H, cs_H );
298
299 // Keep track of the maximum number of iterations performed in the
300 // current sweep. This is used when applying the sweep's Givens
301 // rotations.
303
304 // Perform a sweep: Move through the matrix and perform a bidiagonal
305 // SVD on each non-zero submatrix that is encountered. During the
306 // first time through, ijTL will be 0 and ijBR will be min_m_n - 1.
307 for ( ij_begin = 0; ij_begin < min_m_n; )
308 {
309
310#ifdef PRINTF
311if ( ij_begin == 0 )
312printf( "FLA_Bsvd_v_opd_var2: beginning new sweep (ij_begin = %d)\n", ij_begin );
313#endif
314
315 // Search for the first submatrix along the diagonal that is
316 // bounded by zeroes (or endpoints of the matrix). If no
317 // submatrix is found (ie: if the entire superdiagonal is zero
318 // then FLA_FAILURE is returned. This function also inspects
319 // superdiagonal elements for proximity to zero. If a given
320 // element is close enough to zero, then it is deemed
321 // converged and manually set to zero.
323 ij_begin,
324 buff_d, inc_d,
325 buff_e, inc_e,
326 &ijTL,
327 &ijBR );
328
329 // Verify that a submatrix was found. If one was not found,
330 // then we are done with the current sweep. Furthermore, if
331 // a submatrix was not found AND we began our search at the
332 // beginning of the matrix (ie: ij_begin == 0), then the
333 // matrix has completely deflated and so we are done with
334 // Francis step iteration.
335 if ( r_val == FLA_FAILURE )
336 {
337 if ( ij_begin == 0 )
338 {
339#ifdef PRINTF
340printf( "FLA_Bsvd_v_opd_var2: superdiagonal is completely zero.\n" );
341printf( "FLA_Bsvd_v_opd_var2: Francis iteration is done!\n" );
342#endif
343 done = TRUE;
344 }
345
346 // Break out of the current sweep so we can apply the last
347 // remaining Givens rotations.
348 break;
349 }
350
351 // If we got this far, then:
352 // (a) ijTL refers to the index of the first non-zero
353 // superdiagonal along the diagonal, and
354 // (b) ijBR refers to either:
355 // - the first zero element that occurs after ijTL, or
356 // - the the last diagonal element.
357 // Note that ijTL and ijBR also correspond to the first and
358 // last diagonal elements of the submatrix of interest. Thus,
359 // we may compute the dimension of this submatrix as:
360 m_A11 = ijBR - ijTL + 1;
361
362#ifdef PRINTF
363printf( "FLA_Bsvd_v_opd_var2: ij_begin = %d\n", ij_begin );
364printf( "FLA_Bsvd_v_opd_var2: ijTL = %d\n", ijTL );
365printf( "FLA_Bsvd_v_opd_var2: ijBR = %d\n", ijBR );
366printf( "FLA_Bsvd_v_opd_var2: m_A11 = %d\n", m_A11 );
367#endif
368
369 // Adjust ij_begin, which gets us ready for the next submatrix
370 // search in the current sweep.
371 ij_begin = ijBR + 1;
372
373 // Index to the submatrices upon which we will operate.
374 d1 = buff_d + ijTL * inc_d;
375 e1 = buff_e + ijTL * inc_e;
376 G = buff_G + ijTL * rs_G;
377 H = buff_H + ijTL * rs_H;
378
379 // Search for a batch of singular values, recursing on deflated
380 // subproblems whenever possible. A new singular value search is
381 // performed as long as
382 // (a) there is still matrix left to operate on, and
383 // (b) the number of iterations performed in this batch is
384 // less than n_G.
385 // If/when either of the two above conditions fails to hold,
386 // the function returns.
388 n_GH,
389 ijTL,
390 tol,
391 thresh,
392 d1, inc_d,
393 e1, inc_e,
394 G, rs_G, cs_G,
395 H, rs_H, cs_H,
396 &n_iter_perf );
397
398 // Record the number of deflations that occurred.
400
401 // Update the maximum number of iterations performed in the
402 // current sweep.
404
405#ifdef PRINTF
406printf( "FLA_Bsvd_v_opd_var2: deflations observed = %d\n", n_deflations );
407printf( "FLA_Bsvd_v_opd_var2: total deflations observed = %d\n", total_deflations );
408printf( "FLA_Bsvd_v_opd_var2: num iterations performed = %d\n", n_iter_perf );
409#endif
410
411 // Store the most recent value of ijBR in m_G_sweep_max.
412 // When the sweep is done, this value will contain the minimum
413 // number of rows of G and H we can apply and safely include all
414 // non-identity rotations that were computed during the
415 // singular value searches.
417
418 }
419
420 // The sweep is complete. Now we must apply the Givens rotations
421 // that were accumulated during the sweep.
422
423 // Recall that the number of columns of U and V to which we apply
424 // rotations is one more than the number of rotations.
426
427
428#ifdef PRINTF
429printf( "FLA_Bsvd_v_opd_var2: applying %d sets of Givens rotations\n", n_iter_perf_sweep_max );
430printf( "FLA_Bsvd_v_opd_var2: m_U = %d\n", m_U );
431printf( "FLA_Bsvd_v_opd_var2: m_V = %d\n", m_V );
432printf( "FLA_Bsvd_v_opd_var2: napp= %d\n", n_UV_apply );
433#endif
434
435 // Apply the Givens rotations. Note that we only apply k sets of
436 // rotations, where k = n_iter_perf_sweep_max. Also note that we only
437 // apply to n_UV_apply columns of U and V since this is the most we
438 // need to touch given the most recent value stored to m_GH_sweep_max.
439 //FLA_Apply_G_rf_bld_var5b( n_iter_perf_sweep_max,
441 //FLA_Apply_G_rf_bld_var9b( n_iter_perf_sweep_max,
442 //FLA_Apply_G_rf_bld_var6b( n_iter_perf_sweep_max,
443 min_m_n,
446 buff_G, rs_G, cs_G,
448 b_alg );
449 //FLA_Apply_G_rf_bld_var5b( n_iter_perf_sweep_max,
451 //FLA_Apply_G_rf_bld_var9b( n_iter_perf_sweep_max,
452 //FLA_Apply_G_rf_bld_var6b( n_iter_perf_sweep_max,
453 min_m_n,
456 buff_H, rs_H, cs_H,
458 b_alg );
459
460 // Increment the total number of iterations previously performed.
462
463#ifdef PRINTF
464printf( "FLA_Bsvd_v_opd_var2: total number of iterations performed: %d\n", n_iter_prev );
465#endif
466 }
467
468 // Copy the contents of Q to temporary storage.
470 m_U,
471 m_V,
472 buff_U, rs_U, cs_U,
473 buff_W, rs_W, cs_W );
474// W needs to be max_m_n-by-min_m_n!!!!!!!!!!!!!!!
475
476 // Multiply U by R, overwriting U.
479 m_U,
480 m_V,
481 m_V,
482 &rone,
483 ( double* )buff_W, rs_W, cs_W,
485 &rzero,
486 ( double* )buff_U, rs_U, cs_U );
487
489 m_V,
490 m_V,
491 buff_V, rs_V, cs_V,
492 buff_W, rs_W, cs_W );
493
494 // Multiply V by R, overwriting V.
497 m_V,
498 m_V,
499 m_V,
500 &rone,
501 ( double* )buff_W, rs_W, cs_W,
503 &rzero,
504 ( double* )buff_V, rs_V, cs_V );
505
506 // Make all the singular values positive.
507 {
508 int i;
509 double minus_one = bl1_dm1();
510
511 for ( i = 0; i < min_m_n; ++i )
512 {
513 if ( buff_d[ (i )*inc_d ] < rzero )
514 {
515 buff_d[ (i )*inc_d ] = -buff_d[ (i )*inc_d ];
516
517 // Scale the right singular vectors.
519 m_V,
520 &minus_one,
521 buff_V + (i )*cs_V, rs_V );
522 }
523 }
524 }
525
526 return n_iter_prev;
527}
FLA_Error FLA_Apply_G_rf_bld_var3b(int k_G, int m_A, int n_A, int i_k, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg)
Definition FLA_Apply_G_rf_blk_var3b.c:135
void bl1_dcopymt(trans1_t trans, int m, int n, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
Definition bl1_copymt.c:148
void bl1_dgemm(trans1_t transa, trans1_t transb, int m, int k, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs, double *beta, double *c, int c_rs, int c_cs)
Definition bl1_gemm.c:274
void bl1_dident(int m, double *a, int a_rs, int a_cs)
Definition bl1_ident.c:32
double bl1_d1(void)
Definition bl1_constants.c:54
@ BLIS1_NO_TRANSPOSE
Definition blis_type_defs.h:54

References bl1_d0(), bl1_d1(), bl1_dcopymt(), bl1_dgemm(), bl1_dident(), bl1_dm1(), bl1_dscalv(), bl1_z1(), bl1_zsetm(), BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_Apply_G_rf_bld_var3b(), FLA_Bsvd_compute_tol_thresh_opd(), FLA_Bsvd_find_submatrix_opd(), FLA_Bsvd_iteracc_v_opd_var1(), FLA_Mach_params_opd(), and i.

Referenced by FLA_Bsvd_v_opt_var2().

◆ FLA_Bsvd_v_ops_var1()

FLA_Error FLA_Bsvd_v_ops_var1 ( int  min_m_n,
int  m_U,
int  m_V,
int  n_GH,
int  n_iter_max,
float buff_d,
int  inc_d,
float buff_e,
int  inc_e,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_H,
int  rs_H,
int  cs_H,
float buff_U,
int  rs_U,
int  cs_U,
float buff_V,
int  rs_V,
int  cs_V,
int  b_alg 
)
169{
170 scomplex one = bl1_c1();
171 float rzero = bl1_s0();
172
173 int maxitr = 6;
174
175 float eps;
176 float tolmul;
177 float tol;
178 float thresh;
179
180 scomplex* G;
181 scomplex* H;
182 float* d1;
183 float* e1;
184 int r_val;
185 int done;
186 int m_GH_sweep_max;
187 int ij_begin;
188 int ijTL, ijBR;
189 int m_A11;
190 int n_iter_perf;
191 int n_UV_apply;
193 int n_deflations;
194 int n_iter_prev;
196
197 // Compute some convergence constants.
199 tolmul = max( 10.0F, min( 100.0F, powf( eps, -0.125F ) ) );
201 tolmul,
202 maxitr,
203 buff_d, inc_d,
204 buff_e, inc_e,
205 &tol,
206 &thresh );
207
208 // Initialize our completion flag.
209 done = FALSE;
210
211 // Initialize a counter that holds the maximum number of rows of G
212 // and H that we would need to initialize for the next sweep.
214
215 // Initialize a counter for the total number of iterations performed.
216 n_iter_prev = 0;
217
218 // Iterate until the matrix has completely deflated.
219 for ( total_deflations = 0; done != TRUE; )
220 {
221
222 // Initialize G and H to contain only identity rotations.
224 n_GH,
225 &one,
226 buff_G, rs_G, cs_G );
228 n_GH,
229 &one,
230 buff_H, rs_H, cs_H );
231
232 // Keep track of the maximum number of iterations performed in the
233 // current sweep. This is used when applying the sweep's Givens
234 // rotations.
236
237 // Perform a sweep: Move through the matrix and perform a bidiagonal
238 // SVD on each non-zero submatrix that is encountered. During the
239 // first time through, ijTL will be 0 and ijBR will be min_m_n - 1.
240 for ( ij_begin = 0; ij_begin < min_m_n; )
241 {
242
243 // Search for the first submatrix along the diagonal that is
244 // bounded by zeroes (or endpoints of the matrix). If no
245 // submatrix is found (ie: if the entire superdiagonal is zero
246 // then FLA_FAILURE is returned. This function also inspects
247 // superdiagonal elements for proximity to zero. If a given
248 // element is close enough to zero, then it is deemed
249 // converged and manually set to zero.
251 ij_begin,
252 buff_d, inc_d,
253 buff_e, inc_e,
254 &ijTL,
255 &ijBR );
256
257 // Verify that a submatrix was found. If one was not found,
258 // then we are done with the current sweep. Furthermore, if
259 // a submatrix was not found AND we began our search at the
260 // beginning of the matrix (ie: ij_begin == 0), then the
261 // matrix has completely deflated and so we are done with
262 // Francis step iteration.
263 if ( r_val == FLA_FAILURE )
264 {
265 if ( ij_begin == 0 )
266 {
267 done = TRUE;
268 }
269
270 // Break out of the current sweep so we can apply the last
271 // remaining Givens rotations.
272 break;
273 }
274
275 // If we got this far, then:
276 // (a) ijTL refers to the index of the first non-zero
277 // superdiagonal along the diagonal, and
278 // (b) ijBR refers to either:
279 // - the first zero element that occurs after ijTL, or
280 // - the the last diagonal element.
281 // Note that ijTL and ijBR also correspond to the first and
282 // last diagonal elements of the submatrix of interest. Thus,
283 // we may compute the dimension of this submatrix as:
284 m_A11 = ijBR - ijTL + 1;
285
286 // Adjust ij_begin, which gets us ready for the next submatrix
287 // search in the current sweep.
288 ij_begin = ijBR + 1;
289
290 // Index to the submatrices upon which we will operate.
291 d1 = buff_d + ijTL * inc_d;
292 e1 = buff_e + ijTL * inc_e;
293 G = buff_G + ijTL * rs_G;
294 H = buff_H + ijTL * rs_H;
295
296 // Search for a batch of singular values, recursing on deflated
297 // subproblems whenever a split occurs. Iteration continues as
298 // long as
299 // (a) there is still matrix left to operate on, and
300 // (b) the number of iterations performed in this batch is
301 // less than n_G.
302 // If/when either of the two above conditions fails to hold,
303 // the function returns.
305 n_GH,
306 ijTL,
307 tol,
308 thresh,
309 d1, inc_d,
310 e1, inc_e,
311 G, rs_G, cs_G,
312 H, rs_H, cs_H,
313 &n_iter_perf );
314
315 // Record the number of deflations that were observed.
317
318 // Update the maximum number of iterations performed in the
319 // current sweep.
321
322 // Store the most recent value of ijBR in m_G_sweep_max.
323 // When the sweep is done, this value will contain the minimum
324 // number of rows of G and H we can apply and safely include all
325 // non-identity rotations that were computed during the
326 // singular value searches.
328
329 // Make sure we haven't exceeded our maximum iteration count.
330 if ( n_iter_prev >= n_iter_max * min_m_n )
331 {
332 FLA_Abort();
333 //return FLA_FAILURE;
334 }
335 }
336
337 // The sweep is complete. Now we must apply the Givens rotations
338 // that were accumulated during the sweep.
339
340 // Recall that the number of columns of U and V to which we apply
341 // rotations is one more than the number of rotations.
343
344 // Apply the Givens rotations. Note that we only apply k sets of
345 // rotations, where k = n_iter_perf_sweep_max. Also note that we only
346 // apply to n_UV_apply columns of U and V since this is the most we
347 // need to touch given the most recent value stored to m_GH_sweep_max.
348 //FLA_Apply_G_rf_bls_var5( n_iter_perf_sweep_max,
350 //FLA_Apply_G_rf_bld_var9( n_iter_perf_sweep_max,
351 //FLA_Apply_G_rf_bld_var6( n_iter_perf_sweep_max,
352 m_U,
354 buff_G, rs_G, cs_G,
355 buff_U, rs_U, cs_U,
356 b_alg );
357 //FLA_Apply_G_rf_blc_var5( n_iter_perf_sweep_max,
359 //FLA_Apply_G_rf_bld_var9( n_iter_perf_sweep_max,
360 //FLA_Apply_G_rf_bld_var6( n_iter_perf_sweep_max,
361 m_V,
363 buff_H, rs_H, cs_H,
364 buff_V, rs_V, cs_V,
365 b_alg );
366
367 // Increment the total number of iterations previously performed.
369
370 }
371
372 // Make all the singular values positive.
373 {
374 int i;
375 float minus_one = bl1_sm1();
376
377 for ( i = 0; i < min_m_n; ++i )
378 {
379 if ( buff_d[ (i )*inc_d ] < rzero )
380 {
381 buff_d[ (i )*inc_d ] = -buff_d[ (i )*inc_d ];
382
383 // Scale the right singular vectors.
385 m_V,
386 &minus_one,
387 buff_V + (i )*cs_V, rs_V );
388 }
389 }
390 }
391
392 return n_iter_prev;
393}
FLA_Error FLA_Apply_G_rf_bls_var3(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg)
Definition FLA_Apply_G_rf_blk_var3.c:99
void bl1_sscalv(conj1_t conj, int n, float *alpha, float *x, int incx)
Definition bl1_scalv.c:13

References bl1_c1(), bl1_csetm(), bl1_s0(), bl1_sm1(), bl1_sscalv(), BLIS1_NO_CONJUGATE, FLA_Abort(), FLA_Apply_G_rf_bls_var3(), FLA_Bsvd_compute_tol_thresh_ops(), FLA_Bsvd_find_submatrix_ops(), FLA_Bsvd_iteracc_v_ops_var1(), FLA_Mach_params_ops(), and i.

Referenced by FLA_Bsvd_v_opt_var1().

◆ FLA_Bsvd_v_ops_var2()

FLA_Error FLA_Bsvd_v_ops_var2 ( int  min_m_n,
int  m_U,
int  m_V,
int  n_GH,
int  n_iter_max,
float buff_d,
int  inc_d,
float buff_e,
int  inc_e,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_H,
int  rs_H,
int  cs_H,
float buff_RG,
int  rs_RG,
int  cs_RG,
float buff_RH,
int  rs_RH,
int  cs_RH,
float buff_W,
int  rs_W,
int  cs_W,
float buff_U,
int  rs_U,
int  cs_U,
float buff_V,
int  rs_V,
int  cs_V,
int  b_alg 
)
206{
208
209 return FLA_SUCCESS;
210}

References i.

Referenced by FLA_Bsvd_v_opt_var2().

◆ FLA_Bsvd_v_opt_var1()

FLA_Error FLA_Bsvd_v_opt_var1 ( dim_t  n_iter_max,
FLA_Obj  d,
FLA_Obj  e,
FLA_Obj  G,
FLA_Obj  H,
FLA_Obj  U,
FLA_Obj  V,
dim_t  b_alg 
)
16{
18 FLA_Datatype datatype;
19 int m_U, m_V, n_GH;
20 int inc_d;
21 int inc_e;
22 int rs_G, cs_G;
23 int rs_H, cs_H;
24 int rs_U, cs_U;
25 int rs_V, cs_V;
26
27 datatype = FLA_Obj_datatype( U );
28
29 m_U = FLA_Obj_length( U );
30 m_V = FLA_Obj_length( V );
31 n_GH = FLA_Obj_width( G );
32
35
38
41
44
47
48
49 switch ( datatype )
50 {
51 case FLA_FLOAT:
52 {
53 float* buff_d = FLA_FLOAT_PTR( d );
54 float* buff_e = FLA_FLOAT_PTR( e );
57 float* buff_U = FLA_FLOAT_PTR( U );
58 float* buff_V = FLA_FLOAT_PTR( V );
59
61 m_U,
62 m_V,
63 n_GH,
71 b_alg );
72
73 break;
74 }
75
76 case FLA_DOUBLE:
77 {
78 double* buff_d = FLA_DOUBLE_PTR( d );
79 double* buff_e = FLA_DOUBLE_PTR( e );
82 double* buff_U = FLA_DOUBLE_PTR( U );
83 double* buff_V = FLA_DOUBLE_PTR( V );
84
86 m_U,
87 m_V,
88 n_GH,
96 b_alg );
97
98 break;
99 }
100
101 case FLA_COMPLEX:
102 {
103 float* buff_d = FLA_FLOAT_PTR( d );
104 float* buff_e = FLA_FLOAT_PTR( e );
109
111 m_U,
112 m_V,
113 n_GH,
115 buff_d, inc_d,
116 buff_e, inc_e,
117 buff_G, rs_G, cs_G,
118 buff_H, rs_H, cs_H,
119 buff_U, rs_U, cs_U,
120 buff_V, rs_V, cs_V,
121 b_alg );
122
123 break;
124 }
125
127 {
128 double* buff_d = FLA_DOUBLE_PTR( d );
129 double* buff_e = FLA_DOUBLE_PTR( e );
134
136 m_U,
137 m_V,
138 n_GH,
140 buff_d, inc_d,
141 buff_e, inc_e,
142 buff_G, rs_G, cs_G,
143 buff_H, rs_H, cs_H,
144 buff_U, rs_U, cs_U,
145 buff_V, rs_V, cs_V,
146 b_alg );
147
148 break;
149 }
150 }
151
152 return r_val;
153}
FLA_Error FLA_Bsvd_v_opd_var1(int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_H, int rs_H, int cs_H, double *buff_U, int rs_U, int cs_U, double *buff_V, int rs_V, int cs_V, int b_alg)
Definition FLA_Bsvd_v_opt_var1.c:397
FLA_Error FLA_Bsvd_v_opc_var1(int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_H, int rs_H, int cs_H, scomplex *buff_U, int rs_U, int cs_U, scomplex *buff_V, int rs_V, int cs_V, int b_alg)
Definition FLA_Bsvd_v_opt_var1.c:674
FLA_Error FLA_Bsvd_v_opz_var1(int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_H, int rs_H, int cs_H, dcomplex *buff_U, int rs_U, int cs_U, dcomplex *buff_V, int rs_V, int cs_V, int b_alg)
Definition FLA_Bsvd_v_opt_var1.c:912
FLA_Error FLA_Bsvd_v_ops_var1(int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_H, int rs_H, int cs_H, float *buff_U, int rs_U, int cs_U, float *buff_V, int rs_V, int cs_V, int b_alg)
Definition FLA_Bsvd_v_opt_var1.c:157
dim_t FLA_Obj_width(FLA_Obj obj)
Definition FLA_Query.c:123
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition FLA_Query.c:167
dim_t FLA_Obj_length(FLA_Obj obj)
Definition FLA_Query.c:116
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition FLA_Query.c:174
int FLA_Error
Definition FLA_type_defs.h:47

References FLA_Bsvd_v_opc_var1(), FLA_Bsvd_v_opd_var1(), FLA_Bsvd_v_ops_var1(), FLA_Bsvd_v_opz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), FLA_Obj_width(), and i.

Referenced by FLA_Svd_uv_unb_var1().

◆ FLA_Bsvd_v_opt_var2()

FLA_Error FLA_Bsvd_v_opt_var2 ( dim_t  n_iter_max,
FLA_Obj  d,
FLA_Obj  e,
FLA_Obj  G,
FLA_Obj  H,
FLA_Obj  RG,
FLA_Obj  RH,
FLA_Obj  W,
FLA_Obj  U,
FLA_Obj  V,
dim_t  b_alg 
)
14{
16 FLA_Datatype datatype;
17 int m_U, m_V, n_GH;
18 int inc_d;
19 int inc_e;
20 int rs_G, cs_G;
21 int rs_H, cs_H;
22 int rs_RG, cs_RG;
23 int rs_RH, cs_RH;
24 int rs_W, cs_W;
25 int rs_U, cs_U;
26 int rs_V, cs_V;
27
28 datatype = FLA_Obj_datatype( U );
29
30 m_U = FLA_Obj_length( U );
31 m_V = FLA_Obj_length( V );
32 n_GH = FLA_Obj_width( G );
33
36
39
42
45
48
51
54
57
58
59 switch ( datatype )
60 {
61 case FLA_FLOAT:
62 {
63 float* buff_d = FLA_FLOAT_PTR( d );
64 float* buff_e = FLA_FLOAT_PTR( e );
67 float* buff_RG = FLA_FLOAT_PTR( RG );
68 float* buff_RH = FLA_FLOAT_PTR( RH );
69 float* buff_W = FLA_FLOAT_PTR( W );
70 float* buff_U = FLA_FLOAT_PTR( U );
71 float* buff_V = FLA_FLOAT_PTR( V );
72
74 m_U,
75 m_V,
76 n_GH,
87 b_alg );
88
89 break;
90 }
91
92 case FLA_DOUBLE:
93 {
94 double* buff_d = FLA_DOUBLE_PTR( d );
95 double* buff_e = FLA_DOUBLE_PTR( e );
98 double* buff_RG = FLA_DOUBLE_PTR( RG );
99 double* buff_RH = FLA_DOUBLE_PTR( RH );
100 double* buff_W = FLA_DOUBLE_PTR( W );
101 double* buff_U = FLA_DOUBLE_PTR( U );
102 double* buff_V = FLA_DOUBLE_PTR( V );
103
105 m_U,
106 m_V,
107 n_GH,
109 buff_d, inc_d,
110 buff_e, inc_e,
111 buff_G, rs_G, cs_G,
112 buff_H, rs_H, cs_H,
115 buff_W, rs_W, cs_W,
116 buff_U, rs_U, cs_U,
117 buff_V, rs_V, cs_V,
118 b_alg );
119
120 break;
121 }
122
123 case FLA_COMPLEX:
124 {
125 float* buff_d = FLA_FLOAT_PTR( d );
126 float* buff_e = FLA_FLOAT_PTR( e );
129 float* buff_RG = FLA_FLOAT_PTR( RG );
130 float* buff_RH = FLA_FLOAT_PTR( RH );
134
136 m_U,
137 m_V,
138 n_GH,
140 buff_d, inc_d,
141 buff_e, inc_e,
142 buff_G, rs_G, cs_G,
143 buff_H, rs_H, cs_H,
146 buff_W, rs_W, cs_W,
147 buff_U, rs_U, cs_U,
148 buff_V, rs_V, cs_V,
149 b_alg );
150
151 break;
152 }
153
155 {
156 double* buff_d = FLA_DOUBLE_PTR( d );
157 double* buff_e = FLA_DOUBLE_PTR( e );
160 double* buff_RG = FLA_DOUBLE_PTR( RG );
161 double* buff_RH = FLA_DOUBLE_PTR( RH );
165
167 m_U,
168 m_V,
169 n_GH,
171 buff_d, inc_d,
172 buff_e, inc_e,
173 buff_G, rs_G, cs_G,
174 buff_H, rs_H, cs_H,
177 buff_W, rs_W, cs_W,
178 buff_U, rs_U, cs_U,
179 buff_V, rs_V, cs_V,
180 b_alg );
181
182 break;
183 }
184 }
185
186 return r_val;
187}
FLA_Error FLA_Bsvd_v_opd_var2(int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_H, int rs_H, int cs_H, double *buff_RG, int rs_RG, int cs_RG, double *buff_RH, int rs_RH, int cs_RH, double *buff_W, int rs_W, int cs_W, double *buff_U, int rs_U, int cs_U, double *buff_V, int rs_V, int cs_V, int b_alg)
Definition FLA_Bsvd_v_opt_var2.c:214
FLA_Error FLA_Bsvd_v_ops_var2(int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_H, int rs_H, int cs_H, float *buff_RG, int rs_RG, int cs_RG, float *buff_RH, int rs_RH, int cs_RH, float *buff_W, int rs_W, int cs_W, float *buff_U, int rs_U, int cs_U, float *buff_V, int rs_V, int cs_V, int b_alg)
Definition FLA_Bsvd_v_opt_var2.c:191
FLA_Error FLA_Bsvd_v_opc_var2(int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_H, int rs_H, int cs_H, float *buff_RG, int rs_RG, int cs_RG, float *buff_RH, int rs_RH, int cs_RH, scomplex *buff_W, int rs_W, int cs_W, scomplex *buff_U, int rs_U, int cs_U, scomplex *buff_V, int rs_V, int cs_V, int b_alg)
Definition FLA_Bsvd_v_opt_var2.c:529
FLA_Error FLA_Bsvd_v_opz_var2(int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_H, int rs_H, int cs_H, double *buff_RG, int rs_RG, int cs_RG, double *buff_RH, int rs_RH, int cs_RH, dcomplex *buff_W, int rs_W, int cs_W, dcomplex *buff_U, int rs_U, int cs_U, dcomplex *buff_V, int rs_V, int cs_V, int b_alg)
Definition FLA_Bsvd_v_opt_var2.c:550

References FLA_Bsvd_v_opc_var2(), FLA_Bsvd_v_opd_var2(), FLA_Bsvd_v_ops_var2(), FLA_Bsvd_v_opz_var2(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), FLA_Obj_width(), and i.

Referenced by FLA_Svd_uv_unb_var2().

◆ FLA_Bsvd_v_opz_var1()

FLA_Error FLA_Bsvd_v_opz_var1 ( int  min_m_n,
int  m_U,
int  m_V,
int  n_GH,
int  n_iter_max,
double buff_d,
int  inc_d,
double buff_e,
int  inc_e,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_H,
int  rs_H,
int  cs_H,
dcomplex buff_U,
int  rs_U,
int  cs_U,
dcomplex buff_V,
int  rs_V,
int  cs_V,
int  b_alg 
)
924{
925 dcomplex one = bl1_z1();
926 double rzero = bl1_d0();
927
928 int maxitr = 6;
929
930 double eps;
931 double tolmul;
932 double tol;
933 double thresh;
934
935 dcomplex* G;
936 dcomplex* H;
937 double* d1;
938 double* e1;
939 int r_val;
940 int done;
941 int m_GH_sweep_max;
942 int ij_begin;
943 int ijTL, ijBR;
944 int m_A11;
945 int n_iter_perf;
946 int n_UV_apply;
948 int n_deflations;
949 int n_iter_prev;
951
952 // Compute some convergence constants.
954 tolmul = max( 10.0, min( 100.0, pow( eps, -0.125 ) ) );
956 tolmul,
957 maxitr,
958 buff_d, inc_d,
959 buff_e, inc_e,
960 &tol,
961 &thresh );
962#ifdef PRINTF
963 printf( "FLA_Bsvd_v_opz_var1: tolmul = %12.6e\n", tolmul );
964 printf( "FLA_Bsvd_v_opz_var1: tol = %12.6e\n", tol );
965 printf( "FLA_Bsvd_v_opz_var1: thresh = %12.6e\n", thresh );
966#endif
967
968 // Initialize our completion flag.
969 done = FALSE;
970
971 // Initialize a counter that holds the maximum number of rows of G
972 // and H that we would need to initialize for the next sweep.
974
975 // Initialize a counter for the total number of iterations performed.
976 n_iter_prev = 0;
977
978 // Iterate until the matrix has completely deflated.
979 for ( total_deflations = 0; done != TRUE; )
980 {
981
982 // Initialize G and H to contain only identity rotations.
984 n_GH,
985 &one,
986 buff_G, rs_G, cs_G );
988 n_GH,
989 &one,
990 buff_H, rs_H, cs_H );
991
992 // Keep track of the maximum number of iterations performed in the
993 // current sweep. This is used when applying the sweep's Givens
994 // rotations.
996
997 // Perform a sweep: Move through the matrix and perform a bidiagonal
998 // SVD on each non-zero submatrix that is encountered. During the
999 // first time through, ijTL will be 0 and ijBR will be min_m_n - 1.
1000 for ( ij_begin = 0; ij_begin < min_m_n; )
1001 {
1002
1003#ifdef PRINTF
1004 if ( ij_begin == 0 )
1005 printf( "FLA_Bsvd_v_opz_var1: beginning new sweep (ij_begin = %d)\n", ij_begin );
1006#endif
1007
1008 // Search for the first submatrix along the diagonal that is
1009 // bounded by zeroes (or endpoints of the matrix). If no
1010 // submatrix is found (ie: if the entire superdiagonal is zero
1011 // then FLA_FAILURE is returned. This function also inspects
1012 // superdiagonal elements for proximity to zero. If a given
1013 // element is close enough to zero, then it is deemed
1014 // converged and manually set to zero.
1016 ij_begin,
1017 buff_d, inc_d,
1018 buff_e, inc_e,
1019 &ijTL,
1020 &ijBR );
1021
1022 // Verify that a submatrix was found. If one was not found,
1023 // then we are done with the current sweep. Furthermore, if
1024 // a submatrix was not found AND we began our search at the
1025 // beginning of the matrix (ie: ij_begin == 0), then the
1026 // matrix has completely deflated and so we are done with
1027 // Francis step iteration.
1028 if ( r_val == FLA_FAILURE )
1029 {
1030 if ( ij_begin == 0 )
1031 {
1032#ifdef PRINTF
1033 printf( "FLA_Bsvd_v_opz_var1: superdiagonal is completely zero.\n" );
1034 printf( "FLA_Bsvd_v_opz_var1: Francis iteration is done!\n" );
1035#endif
1036 done = TRUE;
1037 }
1038
1039 // Break out of the current sweep so we can apply the last
1040 // remaining Givens rotations.
1041 break;
1042 }
1043
1044 // If we got this far, then:
1045 // (a) ijTL refers to the index of the first non-zero
1046 // superdiagonal along the diagonal, and
1047 // (b) ijBR refers to either:
1048 // - the first zero element that occurs after ijTL, or
1049 // - the the last diagonal element.
1050 // Note that ijTL and ijBR also correspond to the first and
1051 // last diagonal elements of the submatrix of interest. Thus,
1052 // we may compute the dimension of this submatrix as:
1053 m_A11 = ijBR - ijTL + 1;
1054
1055#ifdef PRINTF
1056 printf( "FLA_Bsvd_v_opz_var1: ij_begin = %d\n", ij_begin );
1057 printf( "FLA_Bsvd_v_opz_var1: ijTL = %d\n", ijTL );
1058 printf( "FLA_Bsvd_v_opz_var1: ijBR = %d\n", ijBR );
1059 printf( "FLA_Bsvd_v_opz_var1: m_A11 = %d\n", m_A11 );
1060#endif
1061
1062 // Adjust ij_begin, which gets us ready for the next submatrix
1063 // search in the current sweep.
1064 ij_begin = ijBR + 1;
1065
1066 // Index to the submatrices upon which we will operate.
1067 d1 = buff_d + ijTL * inc_d;
1068 e1 = buff_e + ijTL * inc_e;
1069 G = buff_G + ijTL * rs_G;
1070 H = buff_H + ijTL * rs_H;
1071
1072 // Search for a batch of singular values, recursing on deflated
1073 // subproblems whenever a split occurs. Iteration continues as
1074 // long as
1075 // (a) there is still matrix left to operate on, and
1076 // (b) the number of iterations performed in this batch is
1077 // less than n_G.
1078 // If/when either of the two above conditions fails to hold,
1079 // the function returns.
1081 n_GH,
1082 ijTL,
1083 tol,
1084 thresh,
1085 d1, inc_d,
1086 e1, inc_e,
1087 G, rs_G, cs_G,
1088 H, rs_H, cs_H,
1089 &n_iter_perf );
1090
1091 // Record the number of deflations that were observed.
1093
1094 // Update the maximum number of iterations performed in the
1095 // current sweep.
1097
1098#ifdef PRINTF
1099 printf( "FLA_Bsvd_v_opz_var1: deflations observed = %d\n", n_deflations );
1100 printf( "FLA_Bsvd_v_opz_var1: total deflations observed = %d\n", total_deflations );
1101 printf( "FLA_Bsvd_v_opz_var1: num iterations performed = %d\n", n_iter_perf );
1102#endif
1103
1104 // Store the most recent value of ijBR in m_G_sweep_max.
1105 // When the sweep is done, this value will contain the minimum
1106 // number of rows of G and H we can apply and safely include all
1107 // non-identity rotations that were computed during the
1108 // singular value searches.
1110
1111 // Make sure we haven't exceeded our maximum iteration count.
1112 if ( n_iter_prev >= n_iter_max * min_m_n )
1113 {
1114#ifdef PRINTF
1115 printf( "FLA_Bsvd_v_opz_var1: reached maximum total number of iterations: %d\n", n_iter_prev );
1116#endif
1117 FLA_Abort();
1118 //return FLA_FAILURE;
1119 }
1120 }
1121
1122 // The sweep is complete. Now we must apply the Givens rotations
1123 // that were accumulated during the sweep.
1124
1125 // Recall that the number of columns of U and V to which we apply
1126 // rotations is one more than the number of rotations.
1128
1129#ifdef PRINTF
1130 printf( "FLA_Bsvd_v_opz_var1: applying %d sets of Givens rotations\n", n_iter_perf_sweep_max );
1131 printf( "FLA_Bsvd_v_opz_var1: m_U = %d\n", m_U );
1132 printf( "FLA_Bsvd_v_opz_var1: m_V = %d\n", m_V );
1133 printf( "FLA_Bsvd_v_opz_var1: napp= %d\n", n_UV_apply );
1134#endif
1135 // Apply the Givens rotations. Note that we only apply k sets of
1136 // rotations, where k = n_iter_perf_sweep_max. Also note that we only
1137 // apply to n_UV_apply columns of U and V since this is the most we
1138 // need to touch given the most recent value stored to m_GH_sweep_max.
1139 //FLA_Apply_G_rf_blz_var5( n_iter_perf_sweep_max,
1141 //FLA_Apply_G_rf_blz_var9( n_iter_perf_sweep_max,
1142 //FLA_Apply_G_rf_blz_var6( n_iter_perf_sweep_max,
1143 m_U,
1144 n_UV_apply,
1145 buff_G, rs_G, cs_G,
1146 buff_U, rs_U, cs_U,
1147 b_alg );
1148 //FLA_Apply_G_rf_blz_var5( n_iter_perf_sweep_max,
1150 //FLA_Apply_G_rf_blz_var9( n_iter_perf_sweep_max,
1151 //FLA_Apply_G_rf_blz_var6( n_iter_perf_sweep_max,
1152 m_V,
1153 n_UV_apply,
1154 buff_H, rs_H, cs_H,
1155 buff_V, rs_V, cs_V,
1156 b_alg );
1157
1158 // Increment the total number of iterations previously performed.
1160
1161#ifdef PRINTF
1162 printf( "FLA_Bsvd_v_opz_var1: total number of iterations performed: %d\n", n_iter_prev );
1163#endif
1164 }
1165
1166 // Make all the singular values positive.
1167 {
1168 int i;
1169 double minus_one = bl1_dm1();
1170
1171 for ( i = 0; i < min_m_n; ++i )
1172 {
1173 if ( buff_d[ (i )*inc_d ] < rzero )
1174 {
1175 buff_d[ (i )*inc_d ] = -buff_d[ (i )*inc_d ];
1176
1177 // Scale the right singular vectors.
1179 m_V,
1180 &minus_one,
1181 buff_V + (i )*cs_V, rs_V );
1182 }
1183 }
1184 }
1185
1186 return n_iter_prev;
1187}
FLA_Error FLA_Apply_G_rf_blz_var3(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg)
Definition FLA_Apply_G_rf_blk_var3.c:186
void bl1_zdscalv(conj1_t conj, int n, double *alpha, dcomplex *x, int incx)
Definition bl1_scalv.c:61

References bl1_d0(), bl1_dm1(), bl1_z1(), bl1_zdscalv(), bl1_zsetm(), BLIS1_NO_CONJUGATE, FLA_Abort(), FLA_Apply_G_rf_blz_var3(), FLA_Bsvd_compute_tol_thresh_opd(), FLA_Bsvd_find_submatrix_opd(), FLA_Bsvd_iteracc_v_opd_var1(), FLA_Mach_params_opd(), and i.

Referenced by FLA_Bsvd_v_opt_var1().

◆ FLA_Bsvd_v_opz_var2()

FLA_Error FLA_Bsvd_v_opz_var2 ( int  min_m_n,
int  m_U,
int  m_V,
int  n_GH,
int  n_iter_max,
double buff_d,
int  inc_d,
double buff_e,
int  inc_e,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_H,
int  rs_H,
int  cs_H,
double buff_RG,
int  rs_RG,
int  cs_RG,
double buff_RH,
int  rs_RH,
int  cs_RH,
dcomplex buff_W,
int  rs_W,
int  cs_W,
dcomplex buff_U,
int  rs_U,
int  cs_U,
dcomplex buff_V,
int  rs_V,
int  cs_V,
int  b_alg 
)
565{
566 dcomplex one = bl1_z1();
567 double rone = bl1_d1();
568 double rzero = bl1_d0();
569
570 int maxitr = 6;
571
572 double eps;
573 double tolmul;
574 double tol;
575 double thresh;
576
577 dcomplex* G;
578 dcomplex* H;
579 double* d1;
580 double* e1;
581 int r_val;
582 int done;
583 int m_GH_sweep_max;
584 int ij_begin;
585 int ijTL, ijBR;
586 int m_A11;
587 int n_iter_perf;
588 int n_UV_apply;
590 int n_deflations;
591 int n_iter_prev;
593
594 // Compute some convergence constants.
596 tolmul = max( 10.0, min( 100.0, pow( eps, -0.125 ) ) );
598 tolmul,
599 maxitr,
600 buff_d, inc_d,
601 buff_e, inc_e,
602 &tol,
603 &thresh );
604
605 // Initialize our completion flag.
606 done = FALSE;
607
608 // Initialize a counter that holds the maximum number of rows of G
609 // and H that we would need to initialize for the next sweep.
611
612 // Initialize a counter for the total number of iterations performed.
613 n_iter_prev = 0;
614
615 // Initialize RG and RH to identity.
617 buff_RG, rs_RG, cs_RG );
619 buff_RH, rs_RH, cs_RH );
620
621 // Iterate until the matrix has completely deflated.
622 for ( total_deflations = 0; done != TRUE; )
623 {
624
625 // Initialize G and H to contain only identity rotations.
627 n_GH,
628 &one,
629 buff_G, rs_G, cs_G );
631 n_GH,
632 &one,
633 buff_H, rs_H, cs_H );
634
635 // Keep track of the maximum number of iterations performed in the
636 // current sweep. This is used when applying the sweep's Givens
637 // rotations.
639
640 // Perform a sweep: Move through the matrix and perform a bidiagonal
641 // SVD on each non-zero submatrix that is encountered. During the
642 // first time through, ijTL will be 0 and ijBR will be min_m_n - 1.
643 for ( ij_begin = 0; ij_begin < min_m_n; )
644 {
645
646#ifdef PRINTF
647if ( ij_begin == 0 )
648printf( "FLA_Bsvd_v_opz_var2: beginning new sweep (ij_begin = %d)\n", ij_begin );
649#endif
650
651 // Search for the first submatrix along the diagonal that is
652 // bounded by zeroes (or endpoints of the matrix). If no
653 // submatrix is found (ie: if the entire superdiagonal is zero
654 // then FLA_FAILURE is returned. This function also inspects
655 // superdiagonal elements for proximity to zero. If a given
656 // element is close enough to zero, then it is deemed
657 // converged and manually set to zero.
659 ij_begin,
660 buff_d, inc_d,
661 buff_e, inc_e,
662 &ijTL,
663 &ijBR );
664
665 // Verify that a submatrix was found. If one was not found,
666 // then we are done with the current sweep. Furthermore, if
667 // a submatrix was not found AND we began our search at the
668 // beginning of the matrix (ie: ij_begin == 0), then the
669 // matrix has completely deflated and so we are done with
670 // Francis step iteration.
671 if ( r_val == FLA_FAILURE )
672 {
673 if ( ij_begin == 0 )
674 {
675#ifdef PRINTF
676printf( "FLA_Bsvd_v_opz_var2: superdiagonal is completely zero.\n" );
677printf( "FLA_Bsvd_v_opz_var2: Francis iteration is done!\n" );
678#endif
679 done = TRUE;
680 }
681
682 // Break out of the current sweep so we can apply the last
683 // remaining Givens rotations.
684 break;
685 }
686
687 // If we got this far, then:
688 // (a) ijTL refers to the index of the first non-zero
689 // superdiagonal along the diagonal, and
690 // (b) ijBR refers to either:
691 // - the first zero element that occurs after ijTL, or
692 // - the the last diagonal element.
693 // Note that ijTL and ijBR also correspond to the first and
694 // last diagonal elements of the submatrix of interest. Thus,
695 // we may compute the dimension of this submatrix as:
696 m_A11 = ijBR - ijTL + 1;
697
698#ifdef PRINTF
699printf( "FLA_Bsvd_v_opz_var2: ij_begin = %d\n", ij_begin );
700printf( "FLA_Bsvd_v_opz_var2: ijTL = %d\n", ijTL );
701printf( "FLA_Bsvd_v_opz_var2: ijBR = %d\n", ijBR );
702printf( "FLA_Bsvd_v_opz_var2: m_A11 = %d\n", m_A11 );
703#endif
704
705 // Adjust ij_begin, which gets us ready for the next submatrix
706 // search in the current sweep.
707 ij_begin = ijBR + 1;
708
709 // Index to the submatrices upon which we will operate.
710 d1 = buff_d + ijTL * inc_d;
711 e1 = buff_e + ijTL * inc_e;
712 G = buff_G + ijTL * rs_G;
713 H = buff_H + ijTL * rs_H;
714
715 // Search for a batch of singular values, recursing on deflated
716 // subproblems whenever possible. A new singular value search is
717 // performed as long as
718 // (a) there is still matrix left to operate on, and
719 // (b) the number of iterations performed in this batch is
720 // less than n_G.
721 // If/when either of the two above conditions fails to hold,
722 // the function returns.
724 n_GH,
725 ijTL,
726 tol,
727 thresh,
728 d1, inc_d,
729 e1, inc_e,
730 G, rs_G, cs_G,
731 H, rs_H, cs_H,
732 &n_iter_perf );
733
734 // Record the number of deflations that occurred.
736
737 // Update the maximum number of iterations performed in the
738 // current sweep.
740
741#ifdef PRINTF
742printf( "FLA_Bsvd_v_opz_var2: deflations observed = %d\n", n_deflations );
743printf( "FLA_Bsvd_v_opz_var2: total deflations observed = %d\n", total_deflations );
744printf( "FLA_Bsvd_v_opz_var2: num iterations performed = %d\n", n_iter_perf );
745#endif
746
747 // Store the most recent value of ijBR in m_G_sweep_max.
748 // When the sweep is done, this value will contain the minimum
749 // number of rows of G and H we can apply and safely include all
750 // non-identity rotations that were computed during the
751 // singular value searches.
753
754 }
755
756 // The sweep is complete. Now we must apply the Givens rotations
757 // that were accumulated during the sweep.
758
759 // Recall that the number of columns of U and V to which we apply
760 // rotations is one more than the number of rotations.
762
763
764#ifdef PRINTF
765printf( "FLA_Bsvd_v_opz_var2: applying %d sets of Givens rotations\n", n_iter_perf_sweep_max );
766printf( "FLA_Bsvd_v_opz_var2: m_U = %d\n", m_U );
767printf( "FLA_Bsvd_v_opz_var2: m_V = %d\n", m_V );
768printf( "FLA_Bsvd_v_opz_var2: napp= %d\n", n_UV_apply );
769#endif
770
771 // Apply the Givens rotations. Note that we only apply k sets of
772 // rotations, where k = n_iter_perf_sweep_max. Also note that we only
773 // apply to n_UV_apply columns of U and V since this is the most we
774 // need to touch given the most recent value stored to m_GH_sweep_max.
775 //FLA_Apply_G_rf_bld_var5b( n_iter_perf_sweep_max,
777 //FLA_Apply_G_rf_bld_var9b( n_iter_perf_sweep_max,
778 //FLA_Apply_G_rf_bld_var6b( n_iter_perf_sweep_max,
779 min_m_n,
782 buff_G, rs_G, cs_G,
784 b_alg );
785 //FLA_Apply_G_rf_bld_var5b( n_iter_perf_sweep_max,
787 //FLA_Apply_G_rf_bld_var9b( n_iter_perf_sweep_max,
788 //FLA_Apply_G_rf_bld_var6b( n_iter_perf_sweep_max,
789 min_m_n,
792 buff_H, rs_H, cs_H,
794 b_alg );
795
796 // Increment the total number of iterations previously performed.
798
799#ifdef PRINTF
800printf( "FLA_Bsvd_v_opz_var2: total number of iterations performed: %d\n", n_iter_prev );
801#endif
802 }
803
804 // Copy the contents of Q to temporary storage.
806 m_U,
807 m_V,
808 buff_U, rs_U, cs_U,
809 buff_W, rs_W, cs_W );
810// W needs to be max_m_n-by-min_m_n!!!!!!!!!!!!!!!
811
812 // Multiply U by R, overwriting U.
815 2*m_U,
816 m_V,
817 m_V,
818 &rone,
819 ( double* )buff_W, rs_W, 2*cs_W,
821 &rzero,
822 ( double* )buff_U, rs_U, 2*cs_U );
823
825 m_V,
826 m_V,
827 buff_V, rs_V, cs_V,
828 buff_W, rs_W, cs_W );
829
830 // Multiply V by R, overwriting V.
833 2*m_V,
834 m_V,
835 m_V,
836 &rone,
837 ( double* )buff_W, rs_W, 2*cs_W,
839 &rzero,
840 ( double* )buff_V, rs_V, 2*cs_V );
841
842 // Make all the singular values positive.
843 {
844 int i;
845 double minus_one = bl1_dm1();
846
847 for ( i = 0; i < min_m_n; ++i )
848 {
849 if ( buff_d[ (i )*inc_d ] < rzero )
850 {
851 buff_d[ (i )*inc_d ] = -buff_d[ (i )*inc_d ];
852
853 // Scale the right singular vectors.
855 m_V,
856 &minus_one,
857 buff_V + (i )*cs_V, rs_V );
858 }
859 }
860 }
861
862 return n_iter_prev;
863}
void bl1_zcopymt(trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
Definition bl1_copymt.c:286

References bl1_d0(), bl1_d1(), bl1_dgemm(), bl1_dident(), bl1_dm1(), bl1_z1(), bl1_zcopymt(), bl1_zdscalv(), bl1_zsetm(), BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_Apply_G_rf_bld_var3b(), FLA_Bsvd_compute_tol_thresh_opd(), FLA_Bsvd_find_submatrix_opd(), FLA_Bsvd_iteracc_v_opd_var1(), FLA_Mach_params_opd(), and i.

Referenced by FLA_Bsvd_v_opt_var2().