libflame revision_anchor
Functions
FLA_LU_nopiv_opt_var4.c File Reference

(r)

Functions

FLA_Error FLA_LU_nopiv_opt_var4 (FLA_Obj A)
 
FLA_Error FLA_LU_nopiv_ops_var4 (int m_A, int n_A, float *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_LU_nopiv_opd_var4 (int m_A, int n_A, double *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_LU_nopiv_opc_var4 (int m_A, int n_A, scomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_LU_nopiv_opz_var4 (int m_A, int n_A, dcomplex *buff_A, int rs_A, int cs_A)
 

Function Documentation

◆ FLA_LU_nopiv_opc_var4()

FLA_Error FLA_LU_nopiv_opc_var4 ( int  m_A,
int  n_A,
scomplex buff_A,
int  rs_A,
int  cs_A 
)
226{
229 int min_m_n = min( m_A, n_A );
230 int i;
231
232 for ( i = 0; i < min_m_n; ++i )
233 {
234 scomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
235 scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
236 scomplex* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
237 scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
238 scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
239 scomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
240 scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
241
242 int m_ahead = m_A - i - 1;
243 int n_ahead = n_A - i - 1;
244 int mn_behind = i;
245
246 /*------------------------------------------------------------*/
247
248 // FLA_Dots_external( FLA_MINUS_ONE, a10t, a01, FLA_ONE, alpha11 );
250 mn_behind,
251 buff_m1,
252 a10t, cs_A,
253 a01, rs_A,
254 buff_1,
255 alpha11 );
256
257 // FLA_Gemv_external( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, a10t, FLA_ONE, a12t );
260 mn_behind,
261 n_ahead,
262 buff_m1,
263 A02, rs_A, cs_A,
264 a10t, cs_A,
265 buff_1,
266 a12t, cs_A );
267
268 // FLA_Gemv_external( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, a01, FLA_ONE, a21 );
271 m_ahead,
272 mn_behind,
273 buff_m1,
274 A20, rs_A, cs_A,
275 a01, rs_A,
276 buff_1,
277 a21, rs_A );
278
279 // FLA_Inv_scal_external( alpha11, a21 );
281 m_ahead,
282 alpha11,
283 a21, rs_A );
284
285 /*------------------------------------------------------------*/
286
287 }
288
289 return FLA_SUCCESS;
290}
FLA_Obj FLA_MINUS_ONE
Definition FLA_Init.c:22
FLA_Obj FLA_ONE
Definition FLA_Init.c:18
int i
Definition bl1_axmyv2.c:145
void bl1_cdots(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy, scomplex *beta, scomplex *rho)
Definition bl1_dots.c:39
void bl1_cgemv(trans1_t transa, conj1_t conjx, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition bl1_gemv.c:125
void bl1_cinvscalv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx)
Definition bl1_invscalv.c:52
@ BLIS1_NO_TRANSPOSE
Definition blis_type_defs.h:54
@ BLIS1_TRANSPOSE
Definition blis_type_defs.h:55
@ BLIS1_NO_CONJUGATE
Definition blis_type_defs.h:81
Definition blis_type_defs.h:133

References bl1_cdots(), bl1_cgemv(), bl1_cinvscalv(), BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_MINUS_ONE, FLA_ONE, and i.

Referenced by FLA_LU_nopiv_opt_var4().

◆ FLA_LU_nopiv_opd_var4()

FLA_Error FLA_LU_nopiv_opd_var4 ( int  m_A,
int  n_A,
double buff_A,
int  rs_A,
int  cs_A 
)
155{
156 double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
158 int min_m_n = min( m_A, n_A );
159 int i;
160
161 for ( i = 0; i < min_m_n; ++i )
162 {
163 double* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
164 double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
165 double* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
166 double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
167 double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
168 double* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
169 double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
170
171 int m_ahead = m_A - i - 1;
172 int n_ahead = n_A - i - 1;
173 int mn_behind = i;
174
175 /*------------------------------------------------------------*/
176
177 // FLA_Dots_external( FLA_MINUS_ONE, a10t, a01, FLA_ONE, alpha11 );
179 mn_behind,
180 buff_m1,
181 a10t, cs_A,
182 a01, rs_A,
183 buff_1,
184 alpha11 );
185
186 // FLA_Gemv_external( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, a10t, FLA_ONE, a12t );
189 mn_behind,
190 n_ahead,
191 buff_m1,
192 A02, rs_A, cs_A,
193 a10t, cs_A,
194 buff_1,
195 a12t, cs_A );
196
197 // FLA_Gemv_external( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, a01, FLA_ONE, a21 );
200 m_ahead,
201 mn_behind,
202 buff_m1,
203 A20, rs_A, cs_A,
204 a01, rs_A,
205 buff_1,
206 a21, rs_A );
207
208 // FLA_Inv_scal_external( alpha11, a21 );
210 m_ahead,
211 alpha11,
212 a21, rs_A );
213
214 /*------------------------------------------------------------*/
215
216 }
217
218 return FLA_SUCCESS;
219}
void bl1_ddots(conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy, double *beta, double *rho)
Definition bl1_dots.c:26
void bl1_dgemv(trans1_t transa, conj1_t conjx, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
Definition bl1_gemv.c:69
void bl1_dinvscalv(conj1_t conj, int n, double *alpha, double *x, int incx)
Definition bl1_invscalv.c:26

References bl1_ddots(), bl1_dgemv(), bl1_dinvscalv(), BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_MINUS_ONE, FLA_ONE, and i.

Referenced by FLA_LU_nopiv_opt_var4().

◆ FLA_LU_nopiv_ops_var4()

FLA_Error FLA_LU_nopiv_ops_var4 ( int  m_A,
int  n_A,
float buff_A,
int  rs_A,
int  cs_A 
)
84{
85 float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
87 int min_m_n = min( m_A, n_A );
88 int i;
89
90 for ( i = 0; i < min_m_n; ++i )
91 {
92 float* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
93 float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
94 float* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
95 float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
96 float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
97 float* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
98 float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
99
100 int m_ahead = m_A - i - 1;
101 int n_ahead = n_A - i - 1;
102 int mn_behind = i;
103
104 /*------------------------------------------------------------*/
105
106 // FLA_Dots_external( FLA_MINUS_ONE, a10t, a01, FLA_ONE, alpha11 );
108 mn_behind,
109 buff_m1,
110 a10t, cs_A,
111 a01, rs_A,
112 buff_1,
113 alpha11 );
114
115 // FLA_Gemv_external( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, a10t, FLA_ONE, a12t );
118 mn_behind,
119 n_ahead,
120 buff_m1,
121 A02, rs_A, cs_A,
122 a10t, cs_A,
123 buff_1,
124 a12t, cs_A );
125
126 // FLA_Gemv_external( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, a01, FLA_ONE, a21 );
129 m_ahead,
130 mn_behind,
131 buff_m1,
132 A20, rs_A, cs_A,
133 a01, rs_A,
134 buff_1,
135 a21, rs_A );
136
137 // FLA_Inv_scal_external( alpha11, a21 );
139 m_ahead,
140 alpha11,
141 a21, rs_A );
142
143 /*------------------------------------------------------------*/
144
145 }
146
147 return FLA_SUCCESS;
148}
void bl1_sdots(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy, float *beta, float *rho)
Definition bl1_dots.c:13
void bl1_sgemv(trans1_t transa, conj1_t conjx, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
Definition bl1_gemv.c:13
void bl1_sinvscalv(conj1_t conj, int n, float *alpha, float *x, int incx)
Definition bl1_invscalv.c:13

References bl1_sdots(), bl1_sgemv(), bl1_sinvscalv(), BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_MINUS_ONE, FLA_ONE, and i.

Referenced by FLA_LU_nopiv_opt_var4().

◆ FLA_LU_nopiv_opt_var4()

FLA_Error FLA_LU_nopiv_opt_var4 ( FLA_Obj  A)
16{
17 FLA_Datatype datatype;
18 int m_A, n_A;
19 int rs_A, cs_A;
20
21 datatype = FLA_Obj_datatype( A );
22
23 m_A = FLA_Obj_length( A );
24 n_A = FLA_Obj_width( A );
27
28
29 switch ( datatype )
30 {
31 case FLA_FLOAT:
32 {
33 float* buff_A = FLA_FLOAT_PTR( A );
34
36 n_A,
37 buff_A, rs_A, cs_A );
38
39 break;
40 }
41
42 case FLA_DOUBLE:
43 {
44 double* buff_A = FLA_DOUBLE_PTR( A );
45
47 n_A,
48 buff_A, rs_A, cs_A );
49
50 break;
51 }
52
53 case FLA_COMPLEX:
54 {
56
58 n_A,
59 buff_A, rs_A, cs_A );
60
61 break;
62 }
63
65 {
67
69 n_A,
70 buff_A, rs_A, cs_A );
71
72 break;
73 }
74 }
75
76 return FLA_SUCCESS;
77}
FLA_Error FLA_LU_nopiv_opc_var4(int m_A, int n_A, scomplex *buff_A, int rs_A, int cs_A)
Definition FLA_LU_nopiv_opt_var4.c:223
FLA_Error FLA_LU_nopiv_ops_var4(int m_A, int n_A, float *buff_A, int rs_A, int cs_A)
Definition FLA_LU_nopiv_opt_var4.c:81
FLA_Error FLA_LU_nopiv_opd_var4(int m_A, int n_A, double *buff_A, int rs_A, int cs_A)
Definition FLA_LU_nopiv_opt_var4.c:152
FLA_Error FLA_LU_nopiv_opz_var4(int m_A, int n_A, dcomplex *buff_A, int rs_A, int cs_A)
Definition FLA_LU_nopiv_opt_var4.c:294
dim_t FLA_Obj_width(FLA_Obj obj)
Definition FLA_Query.c:123
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition FLA_Query.c:167
dim_t FLA_Obj_length(FLA_Obj obj)
Definition FLA_Query.c:116
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition FLA_Query.c:174
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition FLA_Query.c:13
int FLA_Datatype
Definition FLA_type_defs.h:49
Definition blis_type_defs.h:138

References FLA_LU_nopiv_opc_var4(), FLA_LU_nopiv_opd_var4(), FLA_LU_nopiv_ops_var4(), FLA_LU_nopiv_opz_var4(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), and i.

Referenced by FLA_LU_nopiv_internal().

◆ FLA_LU_nopiv_opz_var4()

FLA_Error FLA_LU_nopiv_opz_var4 ( int  m_A,
int  n_A,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)
297{
300 int min_m_n = min( m_A, n_A );
301 int i;
302
303 for ( i = 0; i < min_m_n; ++i )
304 {
305 dcomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
306 dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
307 dcomplex* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
308 dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
309 dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
310 dcomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
311 dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
312
313 int m_ahead = m_A - i - 1;
314 int n_ahead = n_A - i - 1;
315 int mn_behind = i;
316
317 /*------------------------------------------------------------*/
318
319 // FLA_Dots_external( FLA_MINUS_ONE, a10t, a01, FLA_ONE, alpha11 );
321 mn_behind,
322 buff_m1,
323 a10t, cs_A,
324 a01, rs_A,
325 buff_1,
326 alpha11 );
327
328 // FLA_Gemv_external( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, a10t, FLA_ONE, a12t );
331 mn_behind,
332 n_ahead,
333 buff_m1,
334 A02, rs_A, cs_A,
335 a10t, cs_A,
336 buff_1,
337 a12t, cs_A );
338
339 // FLA_Gemv_external( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, a01, FLA_ONE, a21 );
342 m_ahead,
343 mn_behind,
344 buff_m1,
345 A20, rs_A, cs_A,
346 a01, rs_A,
347 buff_1,
348 a21, rs_A );
349
350 // FLA_Inv_scal_external( alpha11, a21 );
352 m_ahead,
353 alpha11,
354 a21, rs_A );
355
356 /*------------------------------------------------------------*/
357
358 }
359
360 return FLA_SUCCESS;
361}
void bl1_zdots(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *beta, dcomplex *rho)
Definition bl1_dots.c:56
void bl1_zgemv(trans1_t transa, conj1_t conjx, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition bl1_gemv.c:255
void bl1_zinvscalv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx)
Definition bl1_invscalv.c:78

References bl1_zdots(), bl1_zgemv(), bl1_zinvscalv(), BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_MINUS_ONE, FLA_ONE, and i.

Referenced by FLA_LU_nopiv_opt_var4().