Go to the documentation of this file.
11#ifndef BLIS1_MACRO_DEFS_H
12#define BLIS1_MACRO_DEFS_H
16#define BLIS1_NO_INTRINSICS 0
17#define BLIS1_SSE_INTRINSICS 3
63#define bl1_min( a, b ) ( (a) < (b) ? (a) : (b) )
64#define bl1_max( a, b ) ( (a) > (b) ? (a) : (b) )
65#define bl1_abs( a ) ( (a) <= 0 ? -(a) : (a) )
69#define bl1_fmin( a, b ) bl1_min( a, b )
70#define bl1_fmax( a, b ) bl1_max( a, b )
71#define bl1_fabs( a ) ( (a) <= 0.0 ? -(a) : (a) )
74#define bl1_fminabs( a, b ) \
76 bl1_fmin( bl1_fabs( a ), \
79#define bl1_fmaxabs( a, b ) \
81 bl1_fmax( bl1_fabs( a ), \
89#define bl1_sneg1( x ) \
93#define bl1_dneg1( x ) \
97#define bl1_cneg1( x ) \
102#define bl1_zneg1( x ) \
109#define bl1_sneg2( x, y ) \
113#define bl1_dneg2( x, y ) \
117#define bl1_cneg2( x, y ) \
118(y)->real = -1.0F * (x)->real; \
119(y)->imag = -1.0F * (x)->imag;
122#define bl1_zneg2( x, y ) \
123(y)->real = -1.0 * (x)->real; \
124(y)->imag = -1.0 * (x)->imag;
129#define bl1_ssqrte( alpha, error ) \
130if ( *(alpha) <= 0.0F || isnan( *(alpha) ) ) { *(error) = FLA_FAILURE; } \
131else { *(alpha) = ( float ) sqrt( *(alpha) ); *(error) = FLA_SUCCESS; }
134#define bl1_dsqrte( alpha, error ) \
135if ( *(alpha) <= 0.0 || isnan( *(alpha) ) ) { *(error) = FLA_FAILURE; } \
136else { *(alpha) = ( double ) sqrt( *(alpha) ); *(error) = FLA_SUCCESS; }
139#define bl1_csqrte( alpha, error ) \
140if ( (alpha)->real <= 0.0F || isnan( (alpha)->real) ) \
141{ *(error) = FLA_FAILURE; } \
143(alpha)->real = ( float ) sqrt( (alpha)->real ); \
144(alpha)->imag = 0.0F; *(error) = FLA_SUCCESS; }
147#define bl1_zsqrte( alpha, error ) \
148if ( (alpha)->real <= 0.0 || isnan( (alpha)->real) ) \
149{ *(error) = FLA_FAILURE; } \
151(alpha)->real = ( double ) sqrt( (alpha)->real ); \
152(alpha)->imag = 0.0; *(error) = FLA_SUCCESS; }
157#define bl1_sabsval2( alpha, absval ) \
158*(absval) = ( float ) fabs( ( double ) *(alpha) );
161#define bl1_dabsval2( alpha, absval ) \
162*(absval) = fabs( *(alpha) );
165#define bl1_cabsval2( x, a ) \
167 float s = bl1_fmaxabs( (x)->real, (x)->imag ); \
168 float mag = sqrtf( s ) * \
169 sqrtf( ( (x)->real / s ) * (x)->real + \
170 ( (x)->imag / s ) * (x)->imag ); \
176#define bl1_csabsval2( x, a ) \
178 float s = bl1_fmaxabs( (x)->real, (x)->imag ); \
179 float mag = sqrtf( s ) * \
180 sqrtf( ( (x)->real / s ) * (x)->real + \
181 ( (x)->imag / s ) * (x)->imag ); \
186#define bl1_zabsval2( x, a ) \
188 double s = bl1_fmaxabs( (x)->real, (x)->imag ); \
189 double mag = sqrt( s ) * \
190 sqrt( ( (x)->real / s ) * (x)->real + \
191 ( (x)->imag / s ) * (x)->imag ); \
197#define bl1_zdabsval2( x, a ) \
199 double s = bl1_fmaxabs( (x)->real, (x)->imag ); \
200 double mag = sqrt( s ) * \
201 sqrt( ( (x)->real / s ) * (x)->real + \
202 ( (x)->imag / s ) * (x)->imag ); \
210#define bl1_sabsqr( alpha ) \
211*(alpha) = *(alpha) * *(alpha);
214#define bl1_dabsqr( alpha ) \
215*(alpha) = *(alpha) * *(alpha);
218#define bl1_cabsqr( alpha ) \
219(alpha)->real = (alpha)->real * (alpha)->real + (alpha)->imag * (alpha)->imag; \
223#define bl1_zabsqr( alpha ) \
224(alpha)->real = (alpha)->real * (alpha)->real + (alpha)->imag * (alpha)->imag; \
230#define bl1_sinvscals( a, y ) \
234#define bl1_dinvscals( a, y ) \
238#define bl1_csinvscals( a, y ) \
240(y)->real = (y)->real / *(a); \
241(y)->imag = (y)->imag / *(a); \
245#define bl1_cinvscals( a, y ) \
247 float s = bl1_fmaxabs( (a)->real, (a)->imag ); \
248 float ar_s = (a)->real / s; \
249 float ai_s = (a)->imag / s; \
250 float yrt = (y)->real; \
251 float temp = ( ar_s * (a)->real + ai_s * (a)->imag ); \
252 (y)->real = ( (yrt) * ar_s + (y)->imag * ai_s ) / temp; \
253 (y)->imag = ( (y)->imag * ar_s - (yrt) * ai_s ) / temp; \
257#define bl1_zdinvscals( a, y ) \
259(y)->real = (y)->real / *(a); \
260(y)->imag = (y)->imag / *(a); \
264#define bl1_zinvscals( a, y ) \
266 double s = bl1_fmaxabs( (a)->real, (a)->imag ); \
267 double ar_s = (a)->real / s; \
268 double ai_s = (a)->imag / s; \
269 double yrt = (y)->real; \
270 double temp = ( ar_s * (a)->real + ai_s * (a)->imag ); \
271 (y)->real = ( (yrt) * ar_s + (y)->imag * ai_s ) / temp; \
272 (y)->imag = ( (y)->imag * ar_s - (yrt) * ai_s ) / temp; \
278#define bl1_sdiv3( x, y, a ) \
282#define bl1_ddiv3( x, y, a ) \
287#define bl1_cdiv3( x, y, a ) \
290 bl1_cinvscals( y, a ); \
294#define bl1_zdiv3( x, y, a ) \
297 bl1_zinvscals( y, a ); \
303#define bl1_sadd3( x, y, a ) \
307#define bl1_dadd3( x, y, a ) \
311#define bl1_cadd3( x, y, a ) \
313(a)->real = (x)->real + (y)->real; \
314(a)->imag = (x)->imag + (y)->imag; \
318#define bl1_zadd3( x, y, a ) \
320(a)->real = (x)->real + (y)->real; \
321(a)->imag = (x)->imag + (y)->imag; \
327#define bl1_scopys( conj, x, y ) \
331#define bl1_dcopys( conj, x, y ) \
335#define bl1_ccopys( conj, x, y ) \
337if ( bl1_is_conj( conj ) ) (y)->imag *= -1.0F;
340#define bl1_zcopys( conj, x, y ) \
342if ( bl1_is_conj( conj ) ) (y)->imag *= -1.0;
347#define bl1_sscals( a, y ) \
351#define bl1_dscals( a, y ) \
355#define bl1_csscals( a, y ) \
357(y)->real = *(a) * (y)->real; \
358(y)->imag = *(a) * (y)->imag; \
362#define bl1_cscals( a, y ) \
364float tempr = (a)->real * (y)->real - (a)->imag * (y)->imag; \
365float tempi = (a)->imag * (y)->real + (a)->real * (y)->imag; \
371#define bl1_zdscals( a, y ) \
373(y)->real = *(a) * (y)->real; \
374(y)->imag = *(a) * (y)->imag; \
378#define bl1_zscals( a, y ) \
380double tempr = (a)->real * (y)->real - (a)->imag * (y)->imag; \
381double tempi = (a)->imag * (y)->real + (a)->real * (y)->imag; \
389#define bl1_smult3( x, y, a ) \
393#define bl1_dmult3( x, y, a ) \
397#define bl1_cmult3( x, y, a ) \
399float tempr = (x)->real * (y)->real - (x)->imag * (y)->imag; \
400float tempi = (x)->imag * (y)->real + (x)->real * (y)->imag; \
406#define bl1_zmult3( x, y, a ) \
408double tempr = (x)->real * (y)->real - (x)->imag * (y)->imag; \
409double tempi = (x)->imag * (y)->real + (x)->real * (y)->imag; \
417#define bl1_smult4( alpha, x, y1, y2 ) \
418*(y2) = *(y1) + *(alpha) * *(x);
421#define bl1_dmult4( alpha, x, y1, y2 ) \
422*(y2) = *(y1) + *(alpha) * *(x);
425#define bl1_cmult4( alpha, x, y1, y2 ) \
427(y2)->real = (y1)->real + (alpha)->real * (x)->real - (alpha)->imag * (x)->imag; \
428(y2)->imag = (y1)->imag + (alpha)->imag * (x)->real + (alpha)->real * (x)->imag; \
432#define bl1_zmult4( alpha, x, y1, y2 ) \
434(y2)->real = (y1)->real + (alpha)->real * (x)->real - (alpha)->imag * (x)->imag; \
435(y2)->imag = (y1)->imag + (alpha)->imag * (x)->real + (alpha)->real * (x)->imag; \
441#define bl1_sconjs( a ) \
445#define bl1_dconjs( a ) \
449#define bl1_cconjs( a ) \
453#define bl1_zconjs( a ) \
459#define bl1_scopyconj( x, y ) \
463#define bl1_dcopyconj( x, y ) \
467#define bl1_ccopyconj( x, y ) \
468(y)->real = (x)->real; \
469(y)->imag = -1.0F * (x)->imag;
472#define bl1_zcopyconj( x, y ) \
473(y)->real = (x)->real; \
474(y)->imag = -1.0 * (x)->imag;
479#define bl1_seq1( alpha ) \
483#define bl1_deq1( alpha ) \
487#define bl1_ceq1( alpha ) \
488 ( (alpha)->real == 1.0F && (alpha)->imag == 0.0F )
491#define bl1_zeq1( alpha ) \
492 ( (alpha)->real == 1.0 && (alpha)->imag == 0.0 )
498#define bl1_sswap_pointers( a, b ) \
505#define bl1_dswap_pointers( a, b ) \
512#define bl1_cswap_pointers( a, b ) \
519#define bl1_zswap_pointers( a, b ) \
528#define bl1_swap_ints( a, b ) \
537#define bl1_swap_trans( a, b ) \
539trans1_t temp = (a); \
546#define bl1_swap_conj( a, b ) \
555#define bl1_toggle_side( side ) \
557if ( bl1_is_left( side ) ) side = BLIS1_RIGHT; \
558else side = BLIS1_LEFT; \
563#define bl1_toggle_uplo( uplo ) \
565if ( bl1_is_lower( uplo ) ) uplo = BLIS1_UPPER_TRIANGULAR; \
566else uplo = BLIS1_LOWER_TRIANGULAR; \
570#define bl1_toggle_trans( trans ) \
572if ( bl1_is_notrans( trans ) ) trans = BLIS1_TRANSPOSE; \
573else if ( bl1_is_trans( trans ) ) trans = BLIS1_NO_TRANSPOSE; \
574else if ( bl1_is_conjnotrans( trans ) ) trans = BLIS1_CONJ_TRANSPOSE; \
575else trans = BLIS1_CONJ_NO_TRANSPOSE; \
579#define bl1_toggle_conjtrans( trans ) \
581if ( bl1_is_notrans( trans ) ) trans = BLIS1_CONJ_TRANSPOSE; \
582else trans = BLIS1_NO_TRANSPOSE; \
587#define bl1_toggle_conj( conj ) \
589if ( bl1_is_conj( conj ) ) conj = BLIS1_NO_CONJUGATE; \
590else conj = BLIS1_CONJUGATE; \