libflame revision_anchor
Functions
FLA_Apply_G_rf_asm_var9b.c File Reference

(r)

Functions

FLA_Error FLA_Apply_G_rf_asm_var9b (FLA_Obj G, FLA_Obj A)
 
FLA_Error FLA_Apply_G_rf_ass_var9b (int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asd_var9b (int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asc_var9b (int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asz_var9b (int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
 

Function Documentation

◆ FLA_Apply_G_rf_asc_var9b()

FLA_Error FLA_Apply_G_rf_asc_var9b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
int  iTL,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A 
)
422{
424
425 return FLA_SUCCESS;
426}
int i
Definition bl1_axmyv2.c:145

References i.

Referenced by FLA_Apply_G_rf_asm_var9b().

◆ FLA_Apply_G_rf_asd_var9b()

FLA_Error FLA_Apply_G_rf_asd_var9b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
int  iTL,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double buff_A,
int  rs_A,
int  cs_A 
)
138{
139 double one = bl1_d1();
140 double zero = bl1_d0();
141 double gamma12;
142 double sigma12;
143 double gamma23;
144 double sigma23;
145 double* a1;
146 double* a2;
147 double* a3;
148 dcomplex* g12;
149 dcomplex* g23;
150 int i, j, g, k;
151 int nG, nG_app;
152 int n_iter;
153 int n_left;
154 int k_minus_1;
155 int n_fuse;
157 int m_app;
158
159
160 k_minus_1 = k_G - 1;
161 nG = n_A - 1;
162 n_fuse = 2;
163
164 // Use the simple variant for nG < (k - 1) or k == 1.
165 if ( nG < 2*k_minus_1 || k_G == 1 )
166 {
168 m_A,
169 n_A,
170 buff_G, rs_G, cs_G,
171 buff_A, rs_A, cs_A );
172 return FLA_SUCCESS;
173 }
174
175
176 // Start-up phase.
177
178 for ( j = -1; j < k_minus_1; j += n_fuse )
179 {
180 nG_app = j + 1;
181 n_iter = nG_app;
182 n_left = 1;
183
184 for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g )
185 {
186 g12 = buff_G + (g )*rs_G + (k )*cs_G;
187 g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
188 a1 = buff_A + (g )*cs_A;
189 a2 = buff_A + (g + 1)*cs_A;
190 a3 = buff_A + (g + 2)*cs_A;
191
192 gamma12 = g12->real;
193 sigma12 = g12->imag;
194 gamma23 = g23->real;
195 sigma23 = g23->imag;
196
197 is_ident12 = ( gamma12 == one && sigma12 == zero );
198 is_ident23 = ( gamma23 == one && sigma23 == zero );
199
200 m_app = min( i_k + 3 + j - iTL, m_A );
201 m_app = max( m_app, 0 );
202
203 if ( !is_ident12 && is_ident23 )
204 {
205 // Apply only to columns 1 and 2.
206
208 &gamma12,
209 &sigma12,
210 a1, 1,
211 a2, 1 );
212 }
213 else if ( is_ident12 && !is_ident23 )
214 {
215 // Apply only to columns 2 and 3.
216
218 &gamma23,
219 &sigma23,
220 a2, 1,
221 a3, 1 );
222 }
223 else if ( !is_ident12 && !is_ident23 )
224 {
225 // Apply to all three columns.
226
228 &gamma12,
229 &sigma12,
230 &gamma23,
231 &sigma23,
232 a1, 1,
233 a2, 1,
234 a3, 1 );
235 }
236 }
237
238 if ( n_left == 1 )
239 {
240 g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
241 a2 = buff_A + (g + 1)*cs_A;
242 a3 = buff_A + (g + 2)*cs_A;
243
244 gamma23 = g23->real;
245 sigma23 = g23->imag;
246
247 is_ident23 = ( gamma23 == one && sigma23 == zero );
248
249 m_app = min( i_k + 3 + j - iTL, m_A );
250 m_app = max( m_app, 0 );
251
252 if ( !is_ident23 )
254 &gamma23,
255 &sigma23,
256 a2, 1,
257 a3, 1 );
258 }
259 }
260
261 // Pipeline stage
262
263 for ( ; j < nG - 1; j += n_fuse )
264 {
265 nG_app = k_G;
266 n_iter = nG_app;
267 n_left = 0;
268
269 for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g )
270 {
271 g12 = buff_G + (g )*rs_G + (k )*cs_G;
272 g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
273 a1 = buff_A + (g )*cs_A;
274 a2 = buff_A + (g + 1)*cs_A;
275 a3 = buff_A + (g + 2)*cs_A;
276
277 gamma12 = g12->real;
278 sigma12 = g12->imag;
279 gamma23 = g23->real;
280 sigma23 = g23->imag;
281
282 is_ident12 = ( gamma12 == one && sigma12 == zero );
283 is_ident23 = ( gamma23 == one && sigma23 == zero );
284
285 m_app = min( i_k + 3 + j - iTL, m_A );
286 m_app = max( m_app, 0 );
287
288 if ( !is_ident12 && is_ident23 )
289 {
290 // Apply only to columns 1 and 2.
291
293 &gamma12,
294 &sigma12,
295 a1, 1,
296 a2, 1 );
297 }
298 else if ( is_ident12 && !is_ident23 )
299 {
300 // Apply only to columns 2 and 3.
301
303 &gamma23,
304 &sigma23,
305 a2, 1,
306 a3, 1 );
307 }
308 else if ( !is_ident12 && !is_ident23 )
309 {
310 // Apply to all three columns.
311
313 &gamma12,
314 &sigma12,
315 &gamma23,
316 &sigma23,
317 a1, 1,
318 a2, 1,
319 a3, 1 );
320 }
321 }
322 }
323
324 // Shutdown stage
325
326 for ( j = nG % n_fuse; j < k_G; j += n_fuse )
327 {
328 g = nG - 1;
329 k = j;
330
331 n_left = 1;
332 if ( n_left == 1 )
333 {
334 g12 = buff_G + (g )*rs_G + (k )*cs_G;
335 a1 = buff_A + (g )*cs_A;
336 a2 = buff_A + (g + 1)*cs_A;
337
338 gamma12 = g12->real;
339 sigma12 = g12->imag;
340
341 is_ident12 = ( gamma12 == one && sigma12 == zero );
342
343 m_app = m_A;
344
345 if ( !is_ident12 )
347 &gamma12,
348 &sigma12,
349 a1, 1,
350 a2, 1 );
351 ++k;
352 --g;
353 }
354
355 nG_app = k_minus_1 - j;
356 n_iter = nG_app;
357
358 for ( i = 0; i < n_iter; ++i, ++k, --g )
359 {
360 g12 = buff_G + (g )*rs_G + (k )*cs_G;
361 g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
362 a1 = buff_A + (g )*cs_A;
363 a2 = buff_A + (g + 1)*cs_A;
364 a3 = buff_A + (g + 2)*cs_A;
365
366 gamma12 = g12->real;
367 sigma12 = g12->imag;
368 gamma23 = g23->real;
369 sigma23 = g23->imag;
370
371 is_ident12 = ( gamma12 == one && sigma12 == zero );
372 is_ident23 = ( gamma23 == one && sigma23 == zero );
373
374 m_app = m_A;
375
376 if ( !is_ident12 && is_ident23 )
377 {
378 // Apply only to columns 1 and 2.
379
381 &gamma12,
382 &sigma12,
383 a1, 1,
384 a2, 1 );
385 }
386 else if ( is_ident12 && !is_ident23 )
387 {
388 // Apply only to columns 2 and 3.
389
391 &gamma23,
392 &sigma23,
393 a2, 1,
394 a3, 1 );
395 }
396 else if ( !is_ident12 && !is_ident23 )
397 {
398 // Apply to all three columns.
399
401 &gamma12,
402 &sigma12,
403 &gamma23,
404 &sigma23,
405 a1, 1,
406 a2, 1,
407 a3, 1 );
408 }
409 }
410 }
411
412 return FLA_SUCCESS;
413}
FLA_Error FLA_Apply_G_rf_asd_var1(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var1.c:164
int n_left
Definition bl1_axmyv2.c:149
double bl1_d0(void)
Definition bl1_constants.c:118
double bl1_d1(void)
Definition bl1_constants.c:54
Definition blis_type_defs.h:138

References bl1_d0(), bl1_d1(), FLA_Apply_G_rf_asd_var1(), i, and n_left.

Referenced by FLA_Apply_G_rf_asm_var9b(), and FLA_Apply_G_rf_bld_var9b().

◆ FLA_Apply_G_rf_asm_var9b()

FLA_Error FLA_Apply_G_rf_asm_var9b ( FLA_Obj  G,
FLA_Obj  A 
)
29{
30 FLA_Datatype datatype;
31 int k_G, m_A, n_A;
32 int rs_G, cs_G;
33 int rs_A, cs_A;
34
35 datatype = FLA_Obj_datatype( A );
36
37 k_G = FLA_Obj_width( G );
38 m_A = FLA_Obj_length( A );
39 n_A = FLA_Obj_width( A );
40
43
46
47 switch ( datatype )
48 {
49 case FLA_FLOAT:
50 {
52 float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
53
55 m_A,
56 n_A,
57 0,
58 0,
60 buff_A, rs_A, cs_A );
61
62 break;
63 }
64
65 case FLA_DOUBLE:
66 {
68 double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
69
71 m_A,
72 n_A,
73 0,
74 0,
76 buff_A, rs_A, cs_A );
77
78 break;
79 }
80
81 case FLA_COMPLEX:
82 {
85
87 m_A,
88 n_A,
89 0,
90 0,
92 buff_A, rs_A, cs_A );
93
94 break;
95 }
96
98 {
101
103 m_A,
104 n_A,
105 0,
106 0,
107 buff_G, rs_G, cs_G,
108 buff_A, rs_A, cs_A );
109
110 break;
111 }
112 }
113
114 return FLA_SUCCESS;
115}
FLA_Error FLA_Apply_G_rf_asc_var9b(int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var9b.c:415
FLA_Error FLA_Apply_G_rf_ass_var9b(int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var9b.c:118
FLA_Error FLA_Apply_G_rf_asd_var9b(int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var9b.c:131
FLA_Error FLA_Apply_G_rf_asz_var9b(int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
Definition FLA_Apply_G_rf_asm_var9b.c:428
dim_t FLA_Obj_width(FLA_Obj obj)
Definition FLA_Query.c:123
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition FLA_Query.c:167
dim_t FLA_Obj_length(FLA_Obj obj)
Definition FLA_Query.c:116
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition FLA_Query.c:174
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition FLA_Query.c:13
int FLA_Datatype
Definition FLA_type_defs.h:49
Definition blis_type_defs.h:133

References FLA_Apply_G_rf_asc_var9b(), FLA_Apply_G_rf_asd_var9b(), FLA_Apply_G_rf_ass_var9b(), FLA_Apply_G_rf_asz_var9b(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), and i.

◆ FLA_Apply_G_rf_ass_var9b()

FLA_Error FLA_Apply_G_rf_ass_var9b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
int  iTL,
scomplex buff_G,
int  rs_G,
int  cs_G,
float buff_A,
int  rs_A,
int  cs_A 
)
125{
127
128 return FLA_SUCCESS;
129}

References i.

Referenced by FLA_Apply_G_rf_asm_var9b(), and FLA_Apply_G_rf_bls_var9b().

◆ FLA_Apply_G_rf_asz_var9b()

FLA_Error FLA_Apply_G_rf_asz_var9b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
int  iTL,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)
435{
437
438 return FLA_SUCCESS;
439}

References i.

Referenced by FLA_Apply_G_rf_asm_var9b().