libflame revision_anchor
blis_macro_defs.h
Go to the documentation of this file.
1/*
2
3 Copyright (C) 2014, The University of Texas at Austin
4
5 This file is part of libflame and is available under the 3-Clause
6 BSD license, which can be found in the LICENSE file at the top-level
7 directory, or at http://opensource.org/licenses/BSD-3-Clause
8
9*/
10
11#ifndef BLIS1_MACRO_DEFS_H
12#define BLIS1_MACRO_DEFS_H
13
14// --- Constants ---------------------------------------------------------------
15
16#define BLIS1_NO_INTRINSICS 0
17#define BLIS1_SSE_INTRINSICS 3
18
19// --- boolean ---
20
21#undef FALSE
22#define FALSE 0
23
24#undef TRUE
25#define TRUE 1
26
27/*
28// --- trans ---
29
30#define BLIS1_NO_TRANSPOSE 'n'
31#define BLIS1_TRANSPOSE 't'
32#define BLIS1_CONJ_NO_TRANSPOSE 'c'
33#define BLIS1_CONJ_TRANSPOSE 'h'
34
35// --- conj ---
36
37#define BLIS1_NO_CONJUGATE 'n'
38#define BLIS1_CONJUGATE 'c'
39
40// --- uplo ---
41
42#define BLIS1_LOWER_TRIANGULAR 'l'
43#define BLIS1_UPPER_TRIANGULAR 'u'
44
45// --- side ---
46
47#define BLIS1_LEFT 'l'
48#define BLIS1_RIGHT 'r'
49
50// --- diag ---
51
52#define BLIS1_NONUNIT_DIAG 'n'
53#define BLIS1_UNIT_DIAG 'u'
54#define BLIS1_ZERO_DIAG 'z'
55*/
56
57// --- Functional macros -------------------------------------------------------
58
59// --- Type-agnostic ---
60
61// min, max, abs
62
63#define bl1_min( a, b ) ( (a) < (b) ? (a) : (b) )
64#define bl1_max( a, b ) ( (a) > (b) ? (a) : (b) )
65#define bl1_abs( a ) ( (a) <= 0 ? -(a) : (a) )
66
67// fmin, fmax, fabs
68
69#define bl1_fmin( a, b ) bl1_min( a, b )
70#define bl1_fmax( a, b ) bl1_max( a, b )
71#define bl1_fabs( a ) ( (a) <= 0.0 ? -(a) : (a) )
72
73// fminabs, fmaxabs
74#define bl1_fminabs( a, b ) \
75\
76 bl1_fmin( bl1_fabs( a ), \
77 bl1_fabs( b ) )
78
79#define bl1_fmaxabs( a, b ) \
80\
81 bl1_fmax( bl1_fabs( a ), \
82 bl1_fabs( b ) )
83
84// --- Type-dependent ---
85
86// --- neg1 ---
87
88// void bl1_sneg1( float* x );
89#define bl1_sneg1( x ) \
90*(x) *= -1.0F;
91
92// void bl1_dneg1( double* x );
93#define bl1_dneg1( x ) \
94*(x) *= -1.0;
95
96// void bl1_cneg1( scomplex* x );
97#define bl1_cneg1( x ) \
98(x)->real *= -1.0F; \
99(x)->imag *= -1.0F;
100
101// void bl1_zneg1( dcomplex* x );
102#define bl1_zneg1( x ) \
103(x)->real *= -1.0; \
104(x)->imag *= -1.0;
105
106// --- neg2 ---
107
108// void bl1_sneg2( float* x, float* y );
109#define bl1_sneg2( x, y ) \
110*(y) = -1.0F * *(x);
111
112// void bl1_dneg2( double* x, double* y );
113#define bl1_dneg2( x, y ) \
114*(y) = -1.0 * *(x);
115
116// void bl1_cneg2( scomplex* x, scomplex* y );
117#define bl1_cneg2( x, y ) \
118(y)->real = -1.0F * (x)->real; \
119(y)->imag = -1.0F * (x)->imag;
120
121// void bl1_zneg2( dcomplex* x, dcomplex* y );
122#define bl1_zneg2( x, y ) \
123(y)->real = -1.0 * (x)->real; \
124(y)->imag = -1.0 * (x)->imag;
125
126// --- sqrte ---
127
128// void bl1_ssqrte( float* alpha, int* error );
129#define bl1_ssqrte( alpha, error ) \
130if ( *(alpha) <= 0.0F || isnan( *(alpha) ) ) { *(error) = FLA_FAILURE; } \
131else { *(alpha) = ( float ) sqrt( *(alpha) ); *(error) = FLA_SUCCESS; }
132
133// void bl1_dsqrte( double* alpha, int* error );
134#define bl1_dsqrte( alpha, error ) \
135if ( *(alpha) <= 0.0 || isnan( *(alpha) ) ) { *(error) = FLA_FAILURE; } \
136else { *(alpha) = ( double ) sqrt( *(alpha) ); *(error) = FLA_SUCCESS; }
137
138// void bl1_csqrte( scomplex* alpha, int* error );
139#define bl1_csqrte( alpha, error ) \
140if ( (alpha)->real <= 0.0F || isnan( (alpha)->real) ) \
141{ *(error) = FLA_FAILURE; } \
142else { \
143(alpha)->real = ( float ) sqrt( (alpha)->real ); \
144(alpha)->imag = 0.0F; *(error) = FLA_SUCCESS; }
145
146// void bl1_zsqrte( dcomplex* alpha, int* error );
147#define bl1_zsqrte( alpha, error ) \
148if ( (alpha)->real <= 0.0 || isnan( (alpha)->real) ) \
149{ *(error) = FLA_FAILURE; } \
150else { \
151(alpha)->real = ( double ) sqrt( (alpha)->real ); \
152(alpha)->imag = 0.0; *(error) = FLA_SUCCESS; }
153
154// --- absval2 ---
155
156// void bl1_sabsval2( float* alpha, float* absval );
157#define bl1_sabsval2( alpha, absval ) \
158*(absval) = ( float ) fabs( ( double ) *(alpha) );
159
160// void bl1_dabsval2( double* alpha, double* absval );
161#define bl1_dabsval2( alpha, absval ) \
162*(absval) = fabs( *(alpha) );
163
164// void bl1_cabsval2( scomplex* x, scomplex* a );
165#define bl1_cabsval2( x, a ) \
166{ \
167 float s = bl1_fmaxabs( (x)->real, (x)->imag ); \
168 float mag = sqrtf( s ) * \
169 sqrtf( ( (x)->real / s ) * (x)->real + \
170 ( (x)->imag / s ) * (x)->imag ); \
171 (a)->real = mag; \
172 (a)->imag = 0.0F; \
173}
174
175// void bl1_csabsval2( scomplex* x, float* a );
176#define bl1_csabsval2( x, a ) \
177{ \
178 float s = bl1_fmaxabs( (x)->real, (x)->imag ); \
179 float mag = sqrtf( s ) * \
180 sqrtf( ( (x)->real / s ) * (x)->real + \
181 ( (x)->imag / s ) * (x)->imag ); \
182 *(a) = mag; \
183}
184
185// void bl1_zabsval2( dcomplex* x, dcomplex* a );
186#define bl1_zabsval2( x, a ) \
187{ \
188 double s = bl1_fmaxabs( (x)->real, (x)->imag ); \
189 double mag = sqrt( s ) * \
190 sqrt( ( (x)->real / s ) * (x)->real + \
191 ( (x)->imag / s ) * (x)->imag ); \
192 (a)->real = mag; \
193 (a)->imag = 0.0; \
194}
195
196// void bl1_zdabsval2( dcomplex* x, double* a );
197#define bl1_zdabsval2( x, a ) \
198{ \
199 double s = bl1_fmaxabs( (x)->real, (x)->imag ); \
200 double mag = sqrt( s ) * \
201 sqrt( ( (x)->real / s ) * (x)->real + \
202 ( (x)->imag / s ) * (x)->imag ); \
203 *(a) = mag; \
204}
205
206
207// --- absqr ---
208
209// void bl1_sabsqr( float* alpha );
210#define bl1_sabsqr( alpha ) \
211*(alpha) = *(alpha) * *(alpha);
212
213// void bl1_dabsqr( double* alpha );
214#define bl1_dabsqr( alpha ) \
215*(alpha) = *(alpha) * *(alpha);
216
217// void bl1_cabsqr( scomplex* alpha );
218#define bl1_cabsqr( alpha ) \
219(alpha)->real = (alpha)->real * (alpha)->real + (alpha)->imag * (alpha)->imag; \
220(alpha)->imag = 0.0F;
221
222// void bl1_zabsqr( dcomplex* alpha );
223#define bl1_zabsqr( alpha ) \
224(alpha)->real = (alpha)->real * (alpha)->real + (alpha)->imag * (alpha)->imag; \
225(alpha)->imag = 0.0;
226
227// --- invscals ---
228
229// void bl1_sinvscals( float* a, float* y );
230#define bl1_sinvscals( a, y ) \
231*(y) = *(y) / *(a);
232
233// void bl1_dinvscals( double* a, double* y );
234#define bl1_dinvscals( a, y ) \
235*(y) = *(y) / *(a);
236
237// void bl1_csinvscals( float* a, scomplex* y );
238#define bl1_csinvscals( a, y ) \
239{ \
240(y)->real = (y)->real / *(a); \
241(y)->imag = (y)->imag / *(a); \
242}
243
244// void bl1_cinvscals( scomplex* a, scomplex* y );
245#define bl1_cinvscals( a, y ) \
246{ \
247 float s = bl1_fmaxabs( (a)->real, (a)->imag ); \
248 float ar_s = (a)->real / s; \
249 float ai_s = (a)->imag / s; \
250 float yrt = (y)->real; \
251 float temp = ( ar_s * (a)->real + ai_s * (a)->imag ); \
252 (y)->real = ( (yrt) * ar_s + (y)->imag * ai_s ) / temp; \
253 (y)->imag = ( (y)->imag * ar_s - (yrt) * ai_s ) / temp; \
254}
255
256// void bl1_zdinvscals( double* a, dcomplex* y );
257#define bl1_zdinvscals( a, y ) \
258{ \
259(y)->real = (y)->real / *(a); \
260(y)->imag = (y)->imag / *(a); \
261}
262
263// void bl1_zinvscals( dcomplex* a, dcomplex* y );
264#define bl1_zinvscals( a, y ) \
265{ \
266 double s = bl1_fmaxabs( (a)->real, (a)->imag ); \
267 double ar_s = (a)->real / s; \
268 double ai_s = (a)->imag / s; \
269 double yrt = (y)->real; \
270 double temp = ( ar_s * (a)->real + ai_s * (a)->imag ); \
271 (y)->real = ( (yrt) * ar_s + (y)->imag * ai_s ) / temp; \
272 (y)->imag = ( (y)->imag * ar_s - (yrt) * ai_s ) / temp; \
273}
274
275// --- div3 ---
276
277// void bl1_sdiv3( float* x, float* y, float* a );
278#define bl1_sdiv3( x, y, a ) \
279*(a) = *(x) / *(y);
280
281// void bl1_ddiv3( double* x, double* y, double* a );
282#define bl1_ddiv3( x, y, a ) \
283*(a) = *(x) / *(y);
284
285// void bl1_cdiv3( scomplex* x, scomplex* y, scomplex* a );
286// a = x / y;
287#define bl1_cdiv3( x, y, a ) \
288{ \
289 *a = *x; \
290 bl1_cinvscals( y, a ); \
291}
292
293// void bl1_zdiv3( dcomplex* x, dcomplex* y, dcomplex* a );
294#define bl1_zdiv3( x, y, a ) \
295{ \
296 *a = *x; \
297 bl1_zinvscals( y, a ); \
298}
299
300// --- add3 ---
301
302// void bl1_sadd3( float* x, float* y, float* a );
303#define bl1_sadd3( x, y, a ) \
304*(a) = *(x) + *(y);
305
306// void bl1_dadd3( double* x, double* y, double* a );
307#define bl1_dadd3( x, y, a ) \
308*(a) = *(x) + *(y);
309
310// void bl1_cadd3( scomplex* x, scomplex* y, scomplex* a );
311#define bl1_cadd3( x, y, a ) \
312{ \
313(a)->real = (x)->real + (y)->real; \
314(a)->imag = (x)->imag + (y)->imag; \
315}
316
317// void bl1_zadd3( dcomplex* x, dcomplex* y, dcomplex* a );
318#define bl1_zadd3( x, y, a ) \
319{ \
320(a)->real = (x)->real + (y)->real; \
321(a)->imag = (x)->imag + (y)->imag; \
322}
323
324// --- copys ---
325
326// void bl1_scopys( conj1_t conj, float* x, float* y );
327#define bl1_scopys( conj, x, y ) \
328*(y) = *(x);
329
330// void bl1_dcopys( conj1_t conj, double* x, double* y );
331#define bl1_dcopys( conj, x, y ) \
332*(y) = *(x);
333
334// void bl1_ccopys( conj1_t conj, scomplex* x, scomplex* y );
335#define bl1_ccopys( conj, x, y ) \
336*(y) = *(x); \
337if ( bl1_is_conj( conj ) ) (y)->imag *= -1.0F;
338
339// void bl1_zcopys( conj1_t conj, dcomplex* x, dcomplex* y );
340#define bl1_zcopys( conj, x, y ) \
341*(y) = *(x); \
342if ( bl1_is_conj( conj ) ) (y)->imag *= -1.0;
343
344// --- scals ---
345
346// void bl1_sscals( float* a, float* y );
347#define bl1_sscals( a, y ) \
348*(y) = *(a) * *(y);
349
350// void bl1_dscals( double* a, double* y );
351#define bl1_dscals( a, y ) \
352*(y) = *(a) * *(y);
353
354// void bl1_csscals( float* a, scomplex* y );
355#define bl1_csscals( a, y ) \
356{ \
357(y)->real = *(a) * (y)->real; \
358(y)->imag = *(a) * (y)->imag; \
359}
360
361// void bl1_cscals( scomplex* a, scomplex* y );
362#define bl1_cscals( a, y ) \
363{ \
364float tempr = (a)->real * (y)->real - (a)->imag * (y)->imag; \
365float tempi = (a)->imag * (y)->real + (a)->real * (y)->imag; \
366(y)->real = tempr; \
367(y)->imag = tempi; \
368}
369
370// void bl1_zdscals( double* a, dcomplex* y );
371#define bl1_zdscals( a, y ) \
372{ \
373(y)->real = *(a) * (y)->real; \
374(y)->imag = *(a) * (y)->imag; \
375}
376
377// void bl1_zscals( dcomplex* a, dcomplex* y );
378#define bl1_zscals( a, y ) \
379{ \
380double tempr = (a)->real * (y)->real - (a)->imag * (y)->imag; \
381double tempi = (a)->imag * (y)->real + (a)->real * (y)->imag; \
382(y)->real = tempr; \
383(y)->imag = tempi; \
384}
385
386// --- mult3 ---
387
388// void bl1_smult3( float* x, float* y, float* a );
389#define bl1_smult3( x, y, a ) \
390*(a) = *(x) * *(y);
391
392// void bl1_dmult3( double* x, double* y, double* a );
393#define bl1_dmult3( x, y, a ) \
394*(a) = *(x) * *(y);
395
396// void bl1_cmult3( scomplex* x, scomplex* y, scomplex* a );
397#define bl1_cmult3( x, y, a ) \
398{ \
399float tempr = (x)->real * (y)->real - (x)->imag * (y)->imag; \
400float tempi = (x)->imag * (y)->real + (x)->real * (y)->imag; \
401(a)->real = tempr; \
402(a)->imag = tempi; \
403}
404
405// void bl1_zmult3( dcomplex* x, dcomplex* y, dcomplex* a );
406#define bl1_zmult3( x, y, a ) \
407{ \
408double tempr = (x)->real * (y)->real - (x)->imag * (y)->imag; \
409double tempi = (x)->imag * (y)->real + (x)->real * (y)->imag; \
410(a)->real = tempr; \
411(a)->imag = tempi; \
412}
413
414// --- mult4 ---
415
416// void bl1_smult4( float* alpha, float* x, float* y1, float* y2 );
417#define bl1_smult4( alpha, x, y1, y2 ) \
418*(y2) = *(y1) + *(alpha) * *(x);
419
420// void bl1_dmult4( double* alpha, double* x, double* y1, double* y2 );
421#define bl1_dmult4( alpha, x, y1, y2 ) \
422*(y2) = *(y1) + *(alpha) * *(x);
423
424// void bl1_cmult4( scomplex* alpha, scomplex* x, scomplex* y1, scomplex* y2 );
425#define bl1_cmult4( alpha, x, y1, y2 ) \
426{ \
427(y2)->real = (y1)->real + (alpha)->real * (x)->real - (alpha)->imag * (x)->imag; \
428(y2)->imag = (y1)->imag + (alpha)->imag * (x)->real + (alpha)->real * (x)->imag; \
429}
430
431// void bl1_zmult4( dcomplex* alpha, dcomplex* x, dcomplex* y1, dcomplex* y2 );
432#define bl1_zmult4( alpha, x, y1, y2 ) \
433{ \
434(y2)->real = (y1)->real + (alpha)->real * (x)->real - (alpha)->imag * (x)->imag; \
435(y2)->imag = (y1)->imag + (alpha)->imag * (x)->real + (alpha)->real * (x)->imag; \
436}
437
438// --- conjs ---
439
440// void bl1_sconjs( float* a );
441#define bl1_sconjs( a ) \
442;
443
444// void bl1_dconjs( double* a );
445#define bl1_dconjs( a ) \
446;
447
448// void bl1_cconjs( scomplex* a );
449#define bl1_cconjs( a ) \
450(a)->imag *= -1.0F;
451
452// void bl1_zconjs( dcomplex* a );
453#define bl1_zconjs( a ) \
454(a)->imag *= -1.0;
455
456// --- copyconj ---
457
458// void bl1_scopyconj( float* x, float* y );
459#define bl1_scopyconj( x, y ) \
460*(y) = *(x);
461
462// void bl1_dcopyconj( double* x, double* y );
463#define bl1_dcopyconj( x, y ) \
464*(y) = *(x);
465
466// void bl1_ccopyconj( scomplex* x, scomplex* y );
467#define bl1_ccopyconj( x, y ) \
468(y)->real = (x)->real; \
469(y)->imag = -1.0F * (x)->imag;
470
471// void bl1_zcopyconj( dcomplex* x, dcomplex* y );
472#define bl1_zcopyconj( x, y ) \
473(y)->real = (x)->real; \
474(y)->imag = -1.0 * (x)->imag;
475
476// --- eq1 ---
477
478// void bl1_seq1( float* alpha );
479#define bl1_seq1( alpha ) \
480 ( *alpha == 1.0F )
481
482// void bl1_deq1( double* alpha );
483#define bl1_deq1( alpha ) \
484 ( *alpha == 1.0 )
485
486// void bl1_ceq1( scomplex* alpha );
487#define bl1_ceq1( alpha ) \
488 ( (alpha)->real == 1.0F && (alpha)->imag == 0.0F )
489
490// void bl1_zeq1( dcomplex* alpha );
491#define bl1_zeq1( alpha ) \
492 ( (alpha)->real == 1.0 && (alpha)->imag == 0.0 )
493
494// --- Swapping/toggle macros --------------------------------------------------
495
496// --- swap_pointers ---
497
498#define bl1_sswap_pointers( a, b ) \
499{ \
500float* temp = (a); \
501(a) = (b); \
502(b) = temp; \
503}
504
505#define bl1_dswap_pointers( a, b ) \
506{ \
507double* temp = (a); \
508(a) = (b); \
509(b) = temp; \
510}
511
512#define bl1_cswap_pointers( a, b ) \
513{ \
514void* temp = (a); \
515(a) = (b); \
516(b) = temp; \
517}
518
519#define bl1_zswap_pointers( a, b ) \
520{ \
521void* temp = (a); \
522(a) = (b); \
523(b) = temp; \
524}
525
526// --- swap_ints ---
527
528#define bl1_swap_ints( a, b ) \
529{ \
530int temp = (a); \
531(a) = (b); \
532(b) = temp; \
533}
534
535// --- swap_trans ---
536
537#define bl1_swap_trans( a, b ) \
538{ \
539trans1_t temp = (a); \
540(a) = (b); \
541(b) = temp; \
542}
543
544// --- swap_conj ---
545
546#define bl1_swap_conj( a, b ) \
547{ \
548conj1_t temp = (a); \
549(a) = (b); \
550(b) = temp; \
551}
552
553// --- toggle_side ---
554
555#define bl1_toggle_side( side ) \
556{ \
557if ( bl1_is_left( side ) ) side = BLIS1_RIGHT; \
558else side = BLIS1_LEFT; \
559}
560
561// --- toggle_uplo ---
562
563#define bl1_toggle_uplo( uplo ) \
564{ \
565if ( bl1_is_lower( uplo ) ) uplo = BLIS1_UPPER_TRIANGULAR; \
566else uplo = BLIS1_LOWER_TRIANGULAR; \
567}
568
569// --- toggle_trans ---
570#define bl1_toggle_trans( trans ) \
571{ \
572if ( bl1_is_notrans( trans ) ) trans = BLIS1_TRANSPOSE; \
573else if ( bl1_is_trans( trans ) ) trans = BLIS1_NO_TRANSPOSE; \
574else if ( bl1_is_conjnotrans( trans ) ) trans = BLIS1_CONJ_TRANSPOSE; \
575else trans = BLIS1_CONJ_NO_TRANSPOSE; \
576}
577
578// --- toggle_conjtrans ---
579#define bl1_toggle_conjtrans( trans ) \
580{ \
581if ( bl1_is_notrans( trans ) ) trans = BLIS1_CONJ_TRANSPOSE; \
582else trans = BLIS1_NO_TRANSPOSE; \
583}
584
585// --- toggle_conj ---
586
587#define bl1_toggle_conj( conj ) \
588{ \
589if ( bl1_is_conj( conj ) ) conj = BLIS1_NO_CONJUGATE; \
590else conj = BLIS1_CONJUGATE; \
591}
592
593#endif // #ifndef BLIS1_MACRO_DEFS_H