libflame revision_anchor
FLA_Apply_G_mx3_opt.h
Go to the documentation of this file.
1/*
2
3 Copyright (C) 2014, The University of Texas at Austin
4
5 This file is part of libflame and is available under the 3-Clause
6 BSD license, which can be found in the LICENSE file at the top-level
7 directory, or at http://opensource.org/licenses/BSD-3-Clause
8
9*/
10
11#define MAC_Apply_G_mx3_ops( m_A, \
12 gamma12, \
13 sigma12, \
14 gamma23, \
15 sigma23, \
16 a1, inc_a1, \
17 a2, inc_a2, \
18 a3, inc_a3 ) \
19{ \
20 float ga12 = *gamma12; \
21 float si12 = *sigma12; \
22 float ga23 = *gamma23; \
23 float si23 = *sigma23; \
24 float* restrict alpha1 = a1; \
25 float* restrict alpha2 = a2; \
26 float* restrict alpha3 = a3; \
27 float temp1; \
28 float temp2; \
29 float temp3; \
30 int i; \
31\
32 for ( i = 0; i < m_A; ++i ) \
33 { \
34 temp1 = *alpha1; \
35 temp2 = *alpha2; \
36\
37 *alpha1 = temp1 * ga12 + temp2 * si12; \
38 *alpha2 = temp2 * ga12 - temp1 * si12; \
39\
40 temp2 = *alpha2; \
41 temp3 = *alpha3; \
42\
43 *alpha2 = temp2 * ga23 + temp3 * si23; \
44 *alpha3 = temp3 * ga23 - temp2 * si23; \
45\
46 alpha1 += inc_a1; \
47 alpha2 += inc_a2; \
48 alpha3 += inc_a3; \
49 } \
50}
51
52#define MAC_Apply_G_mx3_opd( m_A, \
53 gamma12, \
54 sigma12, \
55 gamma23, \
56 sigma23, \
57 a1, inc_a1, \
58 a2, inc_a2, \
59 a3, inc_a3 ) \
60{ \
61 double ga12 = *gamma12; \
62 double si12 = *sigma12; \
63 double ga23 = *gamma23; \
64 double si23 = *sigma23; \
65 double* restrict alpha1 = a1; \
66 double* restrict alpha2 = a2; \
67 double* restrict alpha3 = a3; \
68 double temp1; \
69 double temp2; \
70 double temp3; \
71 int i; \
72\
73 for ( i = 0; i < m_A; ++i ) \
74 { \
75 temp1 = *alpha1; \
76 temp2 = *alpha2; \
77\
78 *alpha1 = temp1 * ga12 + temp2 * si12; \
79 *alpha2 = temp2 * ga12 - temp1 * si12; \
80\
81 temp2 = *alpha2; \
82 temp3 = *alpha3; \
83\
84 *alpha2 = temp2 * ga23 + temp3 * si23; \
85 *alpha3 = temp3 * ga23 - temp2 * si23; \
86\
87 alpha1 += inc_a1; \
88 alpha2 += inc_a2; \
89 alpha3 += inc_a3; \
90 } \
91}
92
93#define MAC_Apply_G_mx3_opc( m_A, \
94 gamma12, \
95 sigma12, \
96 gamma23, \
97 sigma23, \
98 a1, inc_a1, \
99 a2, inc_a2, \
100 a3, inc_a3 ) \
101{ \
102 float ga12 = *gamma12; \
103 float si12 = *sigma12; \
104 float ga23 = *gamma23; \
105 float si23 = *sigma23; \
106 scomplex* restrict alpha1 = a1; \
107 scomplex* restrict alpha2 = a2; \
108 scomplex* restrict alpha3 = a3; \
109 scomplex temp1; \
110 scomplex temp2; \
111 scomplex temp3; \
112 int i; \
113\
114 for ( i = 0; i < m_A; ++i ) \
115 { \
116 temp1 = *alpha1; \
117 temp2 = *alpha2; \
118\
119 alpha1->real = ga12 * temp1.real + si12 * temp2.real; \
120 alpha1->imag = ga12 * temp1.imag + si12 * temp2.imag; \
121\
122 alpha2->real = -si12 * temp1.real + ga12 * temp2.real; \
123 alpha2->imag = -si12 * temp1.imag + ga12 * temp2.imag; \
124\
125 temp2 = *alpha2; \
126 temp3 = *alpha3; \
127\
128 alpha2->real = ga23 * temp2.real + si23 * temp3.real; \
129 alpha2->imag = ga23 * temp2.imag + si23 * temp3.imag; \
130\
131 alpha3->real = -si23 * temp2.real + ga23 * temp3.real; \
132 alpha3->imag = -si23 * temp2.imag + ga23 * temp3.imag; \
133\
134 alpha1 += inc_a1; \
135 alpha2 += inc_a2; \
136 alpha3 += inc_a3; \
137 } \
138}
139
140#define MAC_Apply_G_mx3_opz( m_A, \
141 gamma12, \
142 sigma12, \
143 gamma23, \
144 sigma23, \
145 a1, inc_a1, \
146 a2, inc_a2, \
147 a3, inc_a3 ) \
148{ \
149 double ga12 = *gamma12; \
150 double si12 = *sigma12; \
151 double ga23 = *gamma23; \
152 double si23 = *sigma23; \
153 dcomplex* restrict alpha1 = a1; \
154 dcomplex* restrict alpha2 = a2; \
155 dcomplex* restrict alpha3 = a3; \
156 dcomplex temp1; \
157 dcomplex temp2; \
158 dcomplex temp3; \
159 int i; \
160\
161 for ( i = 0; i < m_A; ++i ) \
162 { \
163 temp1 = *alpha1; \
164 temp2 = *alpha2; \
165\
166 alpha1->real = ga12 * temp1.real + si12 * temp2.real; \
167 alpha1->imag = ga12 * temp1.imag + si12 * temp2.imag; \
168\
169 alpha2->real = -si12 * temp1.real + ga12 * temp2.real; \
170 alpha2->imag = -si12 * temp1.imag + ga12 * temp2.imag; \
171\
172 temp2 = *alpha2; \
173 temp3 = *alpha3; \
174\
175 alpha2->real = ga23 * temp2.real + si23 * temp3.real; \
176 alpha2->imag = ga23 * temp2.imag + si23 * temp3.imag; \
177\
178 alpha3->real = -si23 * temp2.real + ga23 * temp3.real; \
179 alpha3->imag = -si23 * temp2.imag + ga23 * temp3.imag; \
180\
181 alpha1 += inc_a1; \
182 alpha2 += inc_a2; \
183 alpha3 += inc_a3; \
184 } \
185}
186