libflame revision_anchor
FLA_Apply_G_mx3b_opt.h
Go to the documentation of this file.
1/*
2
3 Copyright (C) 2014, The University of Texas at Austin
4
5 This file is part of libflame and is available under the 3-Clause
6 BSD license, which can be found in the LICENSE file at the top-level
7 directory, or at http://opensource.org/licenses/BSD-3-Clause
8
9*/
10
11#define MAC_Apply_G_mx3b_ops( m_A, \
12 gamma12, \
13 sigma12, \
14 gamma23, \
15 sigma23, \
16 a1, inc_a1, \
17 a2, inc_a2, \
18 a3, inc_a3 ) \
19{ \
20 float ga12 = *gamma12; \
21 float si12 = *sigma12; \
22 float ga23 = *gamma23; \
23 float si23 = *sigma23; \
24 float* restrict alpha1 = a1; \
25 float* restrict alpha2 = a2; \
26 float* restrict alpha3 = a3; \
27 float temp1; \
28 float temp2; \
29 float temp3; \
30 int i; \
31\
32 for ( i = 0; i < m_A; ++i ) \
33 { \
34 temp2 = *alpha2; \
35 temp3 = *alpha3; \
36\
37 *alpha2 = temp2 * ga23 + temp3 * si23; \
38 *alpha3 = temp3 * ga23 - temp2 * si23; \
39\
40 temp1 = *alpha1; \
41 temp2 = *alpha2; \
42\
43 *alpha1 = temp1 * ga12 + temp2 * si12; \
44 *alpha2 = temp2 * ga12 - temp1 * si12; \
45\
46 alpha1 += inc_a1; \
47 alpha2 += inc_a2; \
48 alpha3 += inc_a3; \
49 } \
50}
51
52#define MAC_Apply_G_mx3b_opc( m_A, \
53 gamma12, \
54 sigma12, \
55 gamma23, \
56 sigma23, \
57 a1, inc_a1, \
58 a2, inc_a2, \
59 a3, inc_a3 ) \
60{ \
61 float ga12 = *gamma12; \
62 float si12 = *sigma12; \
63 float ga23 = *gamma23; \
64 float si23 = *sigma23; \
65 scomplex* restrict alpha1 = a1; \
66 scomplex* restrict alpha2 = a2; \
67 scomplex* restrict alpha3 = a3; \
68 scomplex temp1; \
69 scomplex temp2; \
70 scomplex temp3; \
71 int i; \
72\
73 for ( i = 0; i < m_A; ++i ) \
74 { \
75 temp2 = *alpha2; \
76 temp3 = *alpha3; \
77\
78 alpha2->real = ga23 * temp2.real + si23 * temp3.real; \
79 alpha2->imag = ga23 * temp2.imag + si23 * temp3.imag; \
80\
81 alpha3->real = -si23 * temp2.real + ga23 * temp3.real; \
82 alpha3->imag = -si23 * temp2.imag + ga23 * temp3.imag; \
83\
84 temp1 = *alpha1; \
85 temp2 = *alpha2; \
86\
87 alpha1->real = ga12 * temp1.real + si12 * temp2.real; \
88 alpha1->imag = ga12 * temp1.imag + si12 * temp2.imag; \
89\
90 alpha2->real = -si12 * temp1.real + ga12 * temp2.real; \
91 alpha2->imag = -si12 * temp1.imag + ga12 * temp2.imag; \
92\
93 alpha1 += inc_a1; \
94 alpha2 += inc_a2; \
95 alpha3 += inc_a3; \
96 } \
97}
98
99#define MAC_Apply_G_mx3b_opd( m_A, \
100 gamma12, \
101 sigma12, \
102 gamma23, \
103 sigma23, \
104 a1, inc_a1, \
105 a2, inc_a2, \
106 a3, inc_a3 ) \
107{ \
108 double ga12 = *gamma12; \
109 double si12 = *sigma12; \
110 double ga23 = *gamma23; \
111 double si23 = *sigma23; \
112 double* restrict alpha1 = a1; \
113 double* restrict alpha2 = a2; \
114 double* restrict alpha3 = a3; \
115 double temp1; \
116 double temp2; \
117 double temp3; \
118 int i; \
119\
120 for ( i = 0; i < m_A; ++i ) \
121 { \
122 temp2 = *alpha2; \
123 temp3 = *alpha3; \
124\
125 *alpha2 = temp2 * ga23 + temp3 * si23; \
126 *alpha3 = temp3 * ga23 - temp2 * si23; \
127\
128 temp1 = *alpha1; \
129 temp2 = *alpha2; \
130\
131 *alpha1 = temp1 * ga12 + temp2 * si12; \
132 *alpha2 = temp2 * ga12 - temp1 * si12; \
133\
134 alpha1 += inc_a1; \
135 alpha2 += inc_a2; \
136 alpha3 += inc_a3; \
137 } \
138}
139
140#define MAC_Apply_G_mx3b_opz( m_A, \
141 gamma12, \
142 sigma12, \
143 gamma23, \
144 sigma23, \
145 a1, inc_a1, \
146 a2, inc_a2, \
147 a3, inc_a3 ) \
148{ \
149 double ga12 = *gamma12; \
150 double si12 = *sigma12; \
151 double ga23 = *gamma23; \
152 double si23 = *sigma23; \
153 dcomplex* restrict alpha1 = a1; \
154 dcomplex* restrict alpha2 = a2; \
155 dcomplex* restrict alpha3 = a3; \
156 dcomplex temp1; \
157 dcomplex temp2; \
158 dcomplex temp3; \
159 int i; \
160\
161 for ( i = 0; i < m_A; ++i ) \
162 { \
163 temp2 = *alpha2; \
164 temp3 = *alpha3; \
165\
166 alpha2->real = ga23 * temp2.real + si23 * temp3.real; \
167 alpha2->imag = ga23 * temp2.imag + si23 * temp3.imag; \
168\
169 alpha3->real = -si23 * temp2.real + ga23 * temp3.real; \
170 alpha3->imag = -si23 * temp2.imag + ga23 * temp3.imag; \
171\
172 temp1 = *alpha1; \
173 temp2 = *alpha2; \
174\
175 alpha1->real = ga12 * temp1.real + si12 * temp2.real; \
176 alpha1->imag = ga12 * temp1.imag + si12 * temp2.imag; \
177\
178 alpha2->real = -si12 * temp1.real + ga12 * temp2.real; \
179 alpha2->imag = -si12 * temp1.imag + ga12 * temp2.imag; \
180\
181 alpha1 += inc_a1; \
182 alpha2 += inc_a2; \
183 alpha3 += inc_a3; \
184 } \
185}
186