libflame revision_anchor
Functions
FLA_SA_LU_unb.c File Reference

(r)

Functions

FLA_Error FLA_SA_LU_unb (FLA_Obj U, FLA_Obj D, FLA_Obj p, FLA_Obj L)
 

Function Documentation

◆ FLA_SA_LU_unb()

FLA_Error FLA_SA_LU_unb ( FLA_Obj  U,
FLA_Obj  D,
FLA_Obj  p,
FLA_Obj  L 
)
14{
15 FLA_Datatype datatype;
16 int m_U, cs_U;
17 int m_D, cs_D;
18 int cs_L;
19 // int rs_U;
20 int rs_D;
21 // int rs_L;
23 int j, ipiv;
24 int* buff_p;
25
26 if ( FLA_Obj_has_zero_dim( U ) ) return FLA_SUCCESS;
27
28 datatype = FLA_Obj_datatype( U );
29
30 m_U = FLA_Obj_length( U );
31 // rs_U = FLA_Obj_row_stride( U );
33
34 m_D = FLA_Obj_length( D );
37
38 // rs_L = FLA_Obj_row_stride( L );
40
43
44 buff_p = ( int * ) FLA_INT_PTR( p );
45
46 switch ( datatype ){
47
48 case FLA_FLOAT:
49 {
50 float* buff_U = ( float * ) FLA_FLOAT_PTR( U );
51 float* buff_D = ( float * ) FLA_FLOAT_PTR( D );
52 float* buff_L = ( float * ) FLA_FLOAT_PTR( L );
53 float* buff_minus1 = ( float * ) FLA_FLOAT_PTR( FLA_MINUS_ONE );
54 float L_tmp;
55 float D_tmp;
56 float d_inv_Ljj;
57
58 for ( j = 0; j < m_U; ++j )
59 {
60 bl1_samax( m_D,
61 buff_D + j*cs_D + 0*rs_D,
62 rs_D,
63 &ipiv );
64
65 L_tmp = buff_L[ j*cs_L + j ];
66 D_tmp = buff_D[ j*cs_D + ipiv ];
67
68 if ( fabsf( L_tmp ) < fabsf( D_tmp ) )
69 {
71 buff_L + 0*cs_L + j, cs_L,
72 buff_D + 0*cs_D + ipiv, cs_D );
73
74 buff_p[ j ] = ipiv + m_U - j;
75 }
76 else
77 {
78 buff_p[ j ] = 0;
79 }
80
81 d_inv_Ljj = 1.0F / buff_L[ j*cs_L + j ];
82
84 &d_inv_Ljj,
85 buff_D + j*cs_D + 0, rs_D );
86
87 m_U_min_j_min_1 = m_U - j - 1;
88
89 if ( m_U_min_j_min_1 > 0 )
90 {
93 m_D,
96 buff_D + (j+0)*cs_D + 0, rs_D,
97 buff_L + (j+1)*cs_L + j, cs_L,
98 buff_D + (j+1)*cs_D + 0, rs_D, cs_D );
99 }
100
101 m_U_min_j = m_U - j;
102
103 if ( m_U_min_j > 0 )
104 {
106 buff_L + j*cs_L + j, cs_L,
107 buff_U + j*cs_U + j, cs_U );
108 }
109 }
110 break;
111 }
112
113 case FLA_DOUBLE:
114 {
115 double* buff_U = ( double * ) FLA_DOUBLE_PTR( U );
116 double* buff_D = ( double * ) FLA_DOUBLE_PTR( D );
117 double* buff_L = ( double * ) FLA_DOUBLE_PTR( L );
118 double* buff_minus1 = ( double * ) FLA_DOUBLE_PTR( FLA_MINUS_ONE );
119 double L_tmp;
120 double D_tmp;
121 double d_inv_Ljj;
122
123 for ( j = 0; j < m_U; ++j )
124 {
125 bl1_damax( m_D,
126 buff_D + j*cs_D + 0*rs_D,
127 rs_D,
128 &ipiv );
129
130 L_tmp = buff_L[ j*cs_L + j ];
131 D_tmp = buff_D[ j*cs_D + ipiv ];
132
133 if ( fabs( L_tmp ) < fabs( D_tmp ) )
134 {
135 bl1_dswap( m_U,
136 buff_L + 0*cs_L + j, cs_L,
137 buff_D + 0*cs_D + ipiv, cs_D );
138
139 buff_p[ j ] = ipiv + m_U - j;
140 }
141 else
142 {
143 buff_p[ j ] = 0;
144 }
145
146 d_inv_Ljj = 1.0 / buff_L[ j*cs_L + j ];
147
148 bl1_dscal( m_D,
149 &d_inv_Ljj,
150 buff_D + j*cs_D + 0, rs_D );
151
152 m_U_min_j_min_1 = m_U - j - 1;
153
154 if ( m_U_min_j_min_1 > 0 )
155 {
158 m_D,
161 buff_D + (j+0)*cs_D + 0, rs_D,
162 buff_L + (j+1)*cs_L + j, cs_L,
163 buff_D + (j+1)*cs_D + 0, rs_D, cs_D );
164 }
165
166 m_U_min_j = m_U - j;
167
168 if ( m_U_min_j > 0 )
169 {
171 buff_L + j*cs_L + j, cs_L,
172 buff_U + j*cs_U + j, cs_U );
173 }
174 }
175 break;
176 }
177
178 case FLA_COMPLEX:
179 {
188 float temp;
189
190 for ( j = 0; j < m_U; ++j )
191 {
192 bl1_camax( m_D,
193 buff_D + j*cs_D + 0*rs_D,
194 rs_D,
195 &ipiv );
196
197 L_tmp = buff_L[ j*cs_L + j ];
198 D_tmp = buff_D[ j*cs_D + ipiv ];
199
200 if ( fabsf( L_tmp.real + L_tmp.imag ) < fabsf( D_tmp.real + D_tmp.imag ) )
201 {
202 bl1_cswap( m_U,
203 buff_L + 0*cs_L + j, cs_L,
204 buff_D + 0*cs_D + ipiv, cs_D );
205
206 buff_p[ j ] = ipiv + m_U - j;
207 }
208 else
209 {
210 buff_p[ j ] = 0;
211 }
212
213 Ljj = buff_L[ j*cs_L + j ];
214
215 // d_inv_Ljj = 1.0 / Ljj
216 temp = 1.0F / ( Ljj.real * Ljj.real +
217 Ljj.imag * Ljj.imag );
218 d_inv_Ljj.real = Ljj.real * temp;
219 d_inv_Ljj.imag = Ljj.imag * -temp;
220
221 bl1_cscal( m_D,
222 &d_inv_Ljj,
223 buff_D + j*cs_D + 0, rs_D );
224
225 m_U_min_j_min_1 = m_U - j - 1;
226
227 if ( m_U_min_j_min_1 > 0 )
228 {
231 m_D,
234 buff_D + (j+0)*cs_D + 0, rs_D,
235 buff_L + (j+1)*cs_L + j, cs_L,
236 buff_D + (j+1)*cs_D + 0, rs_D, cs_D );
237 }
238
239 m_U_min_j = m_U - j;
240
241 if ( m_U_min_j > 0 )
242 {
244 buff_L + j*cs_L + j, cs_L,
245 buff_U + j*cs_U + j, cs_U );
246 }
247 }
248 break;
249 }
250
252 {
261 double temp;
262
263 for ( j = 0; j < m_U; ++j )
264 {
265 bl1_zamax( m_D,
266 buff_D + j*cs_D + 0*rs_D,
267 rs_D,
268 &ipiv );
269
270 L_tmp = buff_L[ j*cs_L + j ];
271 D_tmp = buff_D[ j*cs_D + ipiv ];
272
273 if ( fabs( L_tmp.real + L_tmp.imag ) < fabs( D_tmp.real + D_tmp.imag ) )
274 {
275 bl1_zswap( m_U,
276 buff_L + 0*cs_L + j, cs_L,
277 buff_D + 0*cs_D + ipiv, cs_D );
278
279 buff_p[ j ] = ipiv + m_U - j;
280 }
281 else
282 {
283 buff_p[ j ] = 0;
284 }
285
286 Ljj = buff_L[ j*cs_L + j ];
287
288 // d_inv_Ljj = 1.0 / Ljj
289 temp = 1.0 / ( Ljj.real * Ljj.real +
290 Ljj.imag * Ljj.imag );
291 d_inv_Ljj.real = Ljj.real * temp;
292 d_inv_Ljj.imag = Ljj.imag * -temp;
293
294 bl1_zscal( m_D,
295 &d_inv_Ljj,
296 buff_D + j*cs_D + 0, rs_D );
297
298 m_U_min_j_min_1 = m_U - j - 1;
299
300 if ( m_U_min_j_min_1 > 0 )
301 {
304 m_D,
307 buff_D + (j+0)*cs_D + 0, rs_D,
308 buff_L + (j+1)*cs_L + j, cs_L,
309 buff_D + (j+1)*cs_D + 0, rs_D, cs_D );
310 }
311
312 m_U_min_j = m_U - j;
313
314 if ( m_U_min_j > 0 )
315 {
317 buff_L + j*cs_L + j, cs_L,
318 buff_U + j*cs_U + j, cs_U );
319 }
320 }
321 break;
322 }
323
324 }
325
326 return FLA_SUCCESS;
327}
FLA_Error FLA_Copy_external(FLA_Obj A, FLA_Obj B)
Definition FLA_Copy_external.c:13
FLA_Obj FLA_MINUS_ONE
Definition FLA_Init.c:22
FLA_Bool FLA_Obj_has_zero_dim(FLA_Obj A)
Definition FLA_Query.c:400
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition FLA_Query.c:167
dim_t FLA_Obj_length(FLA_Obj obj)
Definition FLA_Query.c:116
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition FLA_Query.c:174
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition FLA_Query.c:13
int FLA_Datatype
Definition FLA_type_defs.h:49
FLA_Error FLA_Triangularize(FLA_Uplo uplo, FLA_Diag diag, FLA_Obj A)
Definition FLA_Triangularize.c:13
void bl1_samax(int n, float *x, int incx, int *index)
Definition bl1_amax.c:13
void bl1_zamax(int n, dcomplex *x, int incx, int *index)
Definition bl1_amax.c:46
void bl1_damax(int n, double *x, int incx, int *index)
Definition bl1_amax.c:24
void bl1_camax(int n, scomplex *x, int incx, int *index)
Definition bl1_amax.c:35
int i
Definition bl1_axmyv2.c:145
dcomplex temp
Definition bl1_axpyv2b.c:301
void bl1_zcopy(int m, dcomplex *x, int incx, dcomplex *y, int incy)
Definition bl1_copy.c:52
void bl1_dcopy(int m, double *x, int incx, double *y, int incy)
Definition bl1_copy.c:26
void bl1_ccopy(int m, scomplex *x, int incx, scomplex *y, int incy)
Definition bl1_copy.c:39
void bl1_scopy(int m, float *x, int incx, float *y, int incy)
Definition bl1_copy.c:13
void bl1_dger(conj1_t conjx, conj1_t conjy, int m, int n, double *alpha, double *x, int incx, double *y, int incy, double *a, int a_rs, int a_cs)
Definition bl1_ger.c:62
void bl1_zger(conj1_t conjx, conj1_t conjy, int m, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *a, int a_rs, int a_cs)
Definition bl1_ger.c:194
void bl1_cger(conj1_t conjx, conj1_t conjy, int m, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy, scomplex *a, int a_rs, int a_cs)
Definition bl1_ger.c:111
void bl1_sger(conj1_t conjx, conj1_t conjy, int m, int n, float *alpha, float *x, int incx, float *y, int incy, float *a, int a_rs, int a_cs)
Definition bl1_ger.c:13
void bl1_dscal(int n, double *alpha, double *x, int incx)
Definition bl1_scal.c:26
void bl1_zscal(int n, dcomplex *alpha, dcomplex *x, int incx)
Definition bl1_scal.c:78
void bl1_cscal(int n, scomplex *alpha, scomplex *x, int incx)
Definition bl1_scal.c:52
void bl1_sscal(int n, float *alpha, float *x, int incx)
Definition bl1_scal.c:13
void bl1_zswap(int n, dcomplex *x, int incx, dcomplex *y, int incy)
Definition bl1_swap.c:52
void bl1_dswap(int n, double *x, int incx, double *y, int incy)
Definition bl1_swap.c:26
void bl1_cswap(int n, scomplex *x, int incx, scomplex *y, int incy)
Definition bl1_swap.c:39
void bl1_sswap(int n, float *x, int incx, float *y, int incy)
Definition bl1_swap.c:13
@ BLIS1_NO_CONJUGATE
Definition blis_type_defs.h:81
Definition blis_type_defs.h:138
double real
Definition blis_type_defs.h:139
double imag
Definition blis_type_defs.h:139
Definition blis_type_defs.h:133

References bl1_camax(), bl1_ccopy(), bl1_cger(), bl1_cscal(), bl1_cswap(), bl1_damax(), bl1_dcopy(), bl1_dger(), bl1_dscal(), bl1_dswap(), bl1_samax(), bl1_scopy(), bl1_sger(), bl1_sscal(), bl1_sswap(), bl1_zamax(), bl1_zcopy(), bl1_zger(), bl1_zscal(), bl1_zswap(), BLIS1_NO_CONJUGATE, FLA_Copy_external(), FLA_MINUS_ONE, FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_has_zero_dim(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Triangularize(), i, dcomplex::imag, dcomplex::real, and temp.

Referenced by FLA_SA_LU_blk().