libflame revision_anchor
Functions
FLA_LU_piv_opt_var5.c File Reference

(r)

Functions

FLA_Error FLA_LU_piv_opt_var5 (FLA_Obj A, FLA_Obj p)
 
FLA_Error FLA_LU_piv_ops_var5 (int m_A, int n_A, float *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
 
FLA_Error FLA_LU_piv_opd_var5 (int m_A, int n_A, double *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
 
FLA_Error FLA_LU_piv_opc_var5 (int m_A, int n_A, scomplex *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
 
FLA_Error FLA_LU_piv_opz_var5 (int m_A, int n_A, dcomplex *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
 

Function Documentation

◆ FLA_LU_piv_opc_var5()

FLA_Error FLA_LU_piv_opc_var5 ( int  m_A,
int  n_A,
scomplex buff_A,
int  rs_A,
int  cs_A,
int buff_p,
int  inc_p 
)
243{
246 int min_m_n = min( m_A, n_A );
247 int i;
248
249 for ( i = 0; i < min_m_n; ++i )
250 {
252 scomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
253 scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
254 scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
255 scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
256 scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
257
258 int* pi1 = buff_p + i*inc_p;
259
260 int m_ahead = m_A - i - 1;
261 int n_ahead = n_A - i - 1;
262
263 /*------------------------------------------------------------*/
264
265 // FLA_Merge_2x1( alpha11,
266 // a21, &aB1 );
267
268 // FLA_Amax_external( aB1, pi1 );
269 bl1_camax( m_ahead + 1,
270 alpha11, rs_A,
271 pi1 );
272
273 // If a null pivot is encountered, return the index.
274 pivot_val = *(alpha11 + *pi1);
275 if ( pivot_val.real == czero.real &&
276 pivot_val.imag == czero.imag ) r_val = ( r_val == FLA_SUCCESS ? i : r_val );
277 else
278 {
279 // FLA_Merge_1x2( ABL, ABR, &AB );
280
281 // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, AB );
283 a10t, rs_A, cs_A,
284 0,
285 0,
286 pi1, inc_p );
287
288 // FLA_Inv_scal_external( alpha11, a21 );
290 m_ahead,
291 alpha11,
292 a21, rs_A );
293 }
294
295 // FLA_Ger_external( FLA_MINUS_ONE, a21, a12t, A22 );
298 m_ahead,
299 n_ahead,
300 buff_m1,
301 a21, rs_A,
302 a12t, cs_A,
303 A22, rs_A, cs_A );
304
305 /*------------------------------------------------------------*/
306
307 }
308
309 return r_val;
310}
FLA_Error FLA_Apply_pivots_ln_opc_var1(int n, scomplex *a, int a_rs, int a_cs, int k1, int k2, int *p, int incp)
Definition FLA_Apply_pivots_ln_opt_var1.c:356
const scomplex czero
Definition FLA_Init.c:38
FLA_Obj FLA_MINUS_ONE
Definition FLA_Init.c:22
int FLA_Error
Definition FLA_type_defs.h:47
void bl1_camax(int n, scomplex *x, int incx, int *index)
Definition bl1_amax.c:35
int i
Definition bl1_axmyv2.c:145
void bl1_cger(conj1_t conjx, conj1_t conjy, int m, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy, scomplex *a, int a_rs, int a_cs)
Definition bl1_ger.c:111
void bl1_cinvscalv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx)
Definition bl1_invscalv.c:52
@ BLIS1_NO_CONJUGATE
Definition blis_type_defs.h:81
Definition blis_type_defs.h:133
float imag
Definition blis_type_defs.h:134
float real
Definition blis_type_defs.h:134

References bl1_camax(), bl1_cger(), bl1_cinvscalv(), BLIS1_NO_CONJUGATE, czero, FLA_Apply_pivots_ln_opc_var1(), FLA_MINUS_ONE, i, scomplex::imag, and scomplex::real.

Referenced by FLA_LU_piv_opt_var5().

◆ FLA_LU_piv_opd_var5()

FLA_Error FLA_LU_piv_opd_var5 ( int  m_A,
int  n_A,
double buff_A,
int  rs_A,
int  cs_A,
int buff_p,
int  inc_p 
)
169{
172 int min_m_n = min( m_A, n_A );
173 int i;
174
175 for ( i = 0; i < min_m_n; ++i )
176 {
177 double pivot_val = dzero;
178 double* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
179 double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
180 double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
181 double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
182 double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
183
184 int* pi1 = buff_p + i*inc_p;
185
186 int m_ahead = m_A - i - 1;
187 int n_ahead = n_A - i - 1;
188
189 /*------------------------------------------------------------*/
190
191 // FLA_Merge_2x1( alpha11,
192 // a21, &aB1 );
193
194 // FLA_Amax_external( aB1, pi1 );
195 bl1_damax( m_ahead + 1,
196 alpha11, rs_A,
197 pi1 );
198
199 // If a null pivot is encountered, return the index.
200 pivot_val = *(alpha11 + *pi1);
201 if ( pivot_val == dzero ) r_val = ( r_val == FLA_SUCCESS ? i : r_val );
202 else
203 {
204 // FLA_Merge_1x2( ABL, ABR, &AB );
205
206 // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, AB );
208 a10t, rs_A, cs_A,
209 0,
210 0,
211 pi1, inc_p );
212
213 // FLA_Inv_scal_external( alpha11, a21 );
215 m_ahead,
216 alpha11,
217 a21, rs_A );
218 }
219
220 // FLA_Ger_external( FLA_MINUS_ONE, a21, a12t, A22 );
223 m_ahead,
224 n_ahead,
225 buff_m1,
226 a21, rs_A,
227 a12t, cs_A,
228 A22, rs_A, cs_A );
229
230 /*------------------------------------------------------------*/
231
232 }
233
234 return r_val;
235}
FLA_Error FLA_Apply_pivots_ln_opd_var1(int n, double *a, int a_rs, int a_cs, int k1, int k2, int *p, int incp)
Definition FLA_Apply_pivots_ln_opt_var1.c:274
const double dzero
Definition FLA_Init.c:37
void bl1_damax(int n, double *x, int incx, int *index)
Definition bl1_amax.c:24
void bl1_dger(conj1_t conjx, conj1_t conjy, int m, int n, double *alpha, double *x, int incx, double *y, int incy, double *a, int a_rs, int a_cs)
Definition bl1_ger.c:62
void bl1_dinvscalv(conj1_t conj, int n, double *alpha, double *x, int incx)
Definition bl1_invscalv.c:26

References bl1_damax(), bl1_dger(), bl1_dinvscalv(), BLIS1_NO_CONJUGATE, dzero, FLA_Apply_pivots_ln_opd_var1(), FLA_MINUS_ONE, and i.

Referenced by FLA_LU_piv_opt_var5().

◆ FLA_LU_piv_ops_var5()

FLA_Error FLA_LU_piv_ops_var5 ( int  m_A,
int  n_A,
float buff_A,
int  rs_A,
int  cs_A,
int buff_p,
int  inc_p 
)
95{
98 int min_m_n = min( m_A, n_A );
99 int i;
100
101 for ( i = 0; i < min_m_n; ++i )
102 {
103 float pivot_val = fzero;
104 float* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
105 float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
106 float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
107 float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
108 float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
109
110 int* pi1 = buff_p + i*inc_p;
111
112 int m_ahead = m_A - i - 1;
113 int n_ahead = n_A - i - 1;
114
115 /*------------------------------------------------------------*/
116
117 // FLA_Merge_2x1( alpha11,
118 // a21, &aB1 );
119
120 // FLA_Amax_external( aB1, pi1 );
121 bl1_samax( m_ahead + 1,
122 alpha11, rs_A,
123 pi1 );
124
125 // If a null pivot is encountered, return the index.
126 pivot_val = *(alpha11 + *pi1);
127 if ( pivot_val == fzero ) r_val = ( r_val == FLA_SUCCESS ? i : r_val );
128 else
129 {
130 // FLA_Merge_1x2( ABL, ABR, &AB );
131
132 // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, AB );
134 a10t, rs_A, cs_A,
135 0,
136 0,
137 pi1, inc_p );
138
139 // FLA_Inv_scal_external( alpha11, a21 );
141 m_ahead,
142 alpha11,
143 a21, rs_A );
144 }
145
146 // FLA_Ger_external( FLA_MINUS_ONE, a21, a12t, A22 );
149 m_ahead,
150 n_ahead,
151 buff_m1,
152 a21, rs_A,
153 a12t, cs_A,
154 A22, rs_A, cs_A );
155
156 /*------------------------------------------------------------*/
157
158 }
159
160 return r_val;
161}
FLA_Error FLA_Apply_pivots_ln_ops_var1(int n, float *a, int a_rs, int a_cs, int k1, int k2, int *p, int incp)
Definition FLA_Apply_pivots_ln_opt_var1.c:192
const float fzero
Definition FLA_Init.c:36
void bl1_samax(int n, float *x, int incx, int *index)
Definition bl1_amax.c:13
void bl1_sger(conj1_t conjx, conj1_t conjy, int m, int n, float *alpha, float *x, int incx, float *y, int incy, float *a, int a_rs, int a_cs)
Definition bl1_ger.c:13
void bl1_sinvscalv(conj1_t conj, int n, float *alpha, float *x, int incx)
Definition bl1_invscalv.c:13

References bl1_samax(), bl1_sger(), bl1_sinvscalv(), BLIS1_NO_CONJUGATE, FLA_Apply_pivots_ln_ops_var1(), FLA_MINUS_ONE, fzero, and i.

Referenced by FLA_LU_piv_opt_var5().

◆ FLA_LU_piv_opt_var5()

FLA_Error FLA_LU_piv_opt_var5 ( FLA_Obj  A,
FLA_Obj  p 
)
14{
16 FLA_Datatype datatype;
17 int m_A, n_A;
18 int rs_A, cs_A;
19 int inc_p;
20
21 datatype = FLA_Obj_datatype( A );
22
23 m_A = FLA_Obj_length( A );
24 n_A = FLA_Obj_width( A );
27
29
30
31 switch ( datatype )
32 {
33 case FLA_FLOAT:
34 {
35 float* buff_A = FLA_FLOAT_PTR( A );
36 int* buff_p = FLA_INT_PTR( p );
37
39 n_A,
41 buff_p, inc_p );
42
43 break;
44 }
45
46 case FLA_DOUBLE:
47 {
48 double* buff_A = FLA_DOUBLE_PTR( A );
49 int* buff_p = FLA_INT_PTR( p );
50
52 n_A,
54 buff_p, inc_p );
55
56 break;
57 }
58
59 case FLA_COMPLEX:
60 {
62 int* buff_p = FLA_INT_PTR( p );
63
65 n_A,
67 buff_p, inc_p );
68
69 break;
70 }
71
73 {
75 int* buff_p = FLA_INT_PTR( p );
76
78 n_A,
80 buff_p, inc_p );
81
82 break;
83 }
84 }
85
86 return r_val;
87}
FLA_Error FLA_LU_piv_opz_var5(int m_A, int n_A, dcomplex *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
Definition FLA_LU_piv_opt_var5.c:314
FLA_Error FLA_LU_piv_opc_var5(int m_A, int n_A, scomplex *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
Definition FLA_LU_piv_opt_var5.c:239
FLA_Error FLA_LU_piv_ops_var5(int m_A, int n_A, float *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
Definition FLA_LU_piv_opt_var5.c:91
FLA_Error FLA_LU_piv_opd_var5(int m_A, int n_A, double *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
Definition FLA_LU_piv_opt_var5.c:165
dim_t FLA_Obj_width(FLA_Obj obj)
Definition FLA_Query.c:123
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition FLA_Query.c:167
dim_t FLA_Obj_length(FLA_Obj obj)
Definition FLA_Query.c:116
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition FLA_Query.c:174
dim_t FLA_Obj_vector_inc(FLA_Obj obj)
Definition FLA_Query.c:145
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition FLA_Query.c:13
int FLA_Datatype
Definition FLA_type_defs.h:49
Definition blis_type_defs.h:138

References FLA_LU_piv_opc_var5(), FLA_LU_piv_opd_var5(), FLA_LU_piv_ops_var5(), FLA_LU_piv_opz_var5(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), FLA_Obj_width(), and i.

Referenced by FLA_LU_piv_internal().

◆ FLA_LU_piv_opz_var5()

FLA_Error FLA_LU_piv_opz_var5 ( int  m_A,
int  n_A,
dcomplex buff_A,
int  rs_A,
int  cs_A,
int buff_p,
int  inc_p 
)
318{
321 int min_m_n = min( m_A, n_A );
322 int i;
323
324 for ( i = 0; i < min_m_n; ++i )
325 {
327 dcomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
328 dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
329 dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
330 dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
331 dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
332
333 int* pi1 = buff_p + i*inc_p;
334
335 int m_ahead = m_A - i - 1;
336 int n_ahead = n_A - i - 1;
337
338 /*------------------------------------------------------------*/
339
340 // FLA_Merge_2x1( alpha11,
341 // a21, &aB1 );
342
343 // FLA_Amax_external( aB1, pi1 );
344 bl1_zamax( m_ahead + 1,
345 alpha11, rs_A,
346 pi1 );
347
348 // If a null pivot is encountered, return the index.
349 pivot_val = *(alpha11 + *pi1);
350 if ( pivot_val.real == zzero.real &&
351 pivot_val.imag == zzero.imag ) r_val = ( r_val == FLA_SUCCESS ? i : r_val );
352 else
353 {
354 // FLA_Merge_1x2( ABL, ABR, &AB );
355
356 // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, AB );
358 a10t, rs_A, cs_A,
359 0,
360 0,
361 pi1, inc_p );
362
363 // FLA_Inv_scal_external( alpha11, a21 );
365 m_ahead,
366 alpha11,
367 a21, rs_A );
368 }
369 // FLA_Ger_external( FLA_MINUS_ONE, a21, a12t, A22 );
372 m_ahead,
373 n_ahead,
374 buff_m1,
375 a21, rs_A,
376 a12t, cs_A,
377 A22, rs_A, cs_A );
378
379 /*------------------------------------------------------------*/
380
381 }
382
383 return r_val;
384}
FLA_Error FLA_Apply_pivots_ln_opz_var1(int n, dcomplex *a, int a_rs, int a_cs, int k1, int k2, int *p, int incp)
Definition FLA_Apply_pivots_ln_opt_var1.c:438
const dcomplex zzero
Definition FLA_Init.c:39
void bl1_zamax(int n, dcomplex *x, int incx, int *index)
Definition bl1_amax.c:46
void bl1_zger(conj1_t conjx, conj1_t conjy, int m, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *a, int a_rs, int a_cs)
Definition bl1_ger.c:194
void bl1_zinvscalv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx)
Definition bl1_invscalv.c:78
double real
Definition blis_type_defs.h:139
double imag
Definition blis_type_defs.h:139

References bl1_zamax(), bl1_zger(), bl1_zinvscalv(), BLIS1_NO_CONJUGATE, FLA_Apply_pivots_ln_opz_var1(), FLA_MINUS_ONE, i, dcomplex::imag, dcomplex::real, and zzero.

Referenced by FLA_LU_piv_opt_var5().