libflame revision_anchor
Functions
FLA_CAQR2_UT_vars.h File Reference

(r)

Go to the source code of this file.

Functions

FLA_Error FLA_CAQR2_UT_blk_var1 (FLA_Obj U, FLA_Obj D, FLA_Obj T, fla_caqr2ut_t *cntl)
 
FLA_Error FLA_CAQR2_UT_blk_var2 (FLA_Obj U, FLA_Obj D, FLA_Obj T, fla_caqr2ut_t *cntl)
 
FLA_Error FLA_CAQR2_UT_unb_var1 (FLA_Obj U, FLA_Obj D, FLA_Obj T)
 
FLA_Error FLA_CAQR2_UT_opt_var1 (FLA_Obj U, FLA_Obj D, FLA_Obj T)
 
FLA_Error FLA_CAQR2_UT_ops_var1 (int m_UT, int m_D, float *U, int rs_U, int cs_U, float *D, int rs_D, int cs_D, float *T, int rs_T, int cs_T)
 
FLA_Error FLA_CAQR2_UT_opd_var1 (int m_UT, int m_D, double *U, int rs_U, int cs_U, double *D, int rs_D, int cs_D, double *T, int rs_T, int cs_T)
 
FLA_Error FLA_CAQR2_UT_opc_var1 (int m_UT, int m_D, scomplex *U, int rs_U, int cs_U, scomplex *D, int rs_D, int cs_D, scomplex *T, int rs_T, int cs_T)
 
FLA_Error FLA_CAQR2_UT_opz_var1 (int m_UT, int m_D, dcomplex *U, int rs_U, int cs_U, dcomplex *D, int rs_D, int cs_D, dcomplex *T, int rs_T, int cs_T)
 

Function Documentation

◆ FLA_CAQR2_UT_blk_var1()

FLA_Error FLA_CAQR2_UT_blk_var1 ( FLA_Obj  U,
FLA_Obj  D,
FLA_Obj  T,
fla_caqr2ut_t cntl 
)
15{
16 FLA_Obj UTL, UTR, U00, U01, U02,
17 UBL, UBR, U10, U11, U12,
18 U20, U21, U22;
19
20 FLA_Obj DTL, DTR, D00, D01, D02,
21 DBL, DBR, D10, D11, D12,
22 D20, D21, D22;
23
24 FLA_Obj TL, TR, T0, T1, W12;
25
26 FLA_Obj D1;
27
29
31
32 dim_t b_alg, b;
33 dim_t m_DT;
34
35 // Query the algorithmic blocksize by inspecting the length of T.
37
38 // Begin partitioning diagonally through D with m - n rows above
39 // the diagonal.
41
42 FLA_Part_2x2( U, &UTL, &UTR,
43 &UBL, &UBR, 0, 0, FLA_TL );
44
45 FLA_Part_2x2( D, &DTL, &DTR,
46 &DBL, &DBR, m_DT, 0, FLA_TL );
47
48 FLA_Part_1x2( T, &TL, &TR, 0, FLA_LEFT );
49
50 while ( FLA_Obj_min_dim( UBR ) > 0 ){
51
52 b = min( b_alg, FLA_Obj_min_dim( UBR ) );
53
54 FLA_Repart_2x2_to_3x3( UTL, /**/ UTR, &U00, /**/ &U01, &U02,
55 /* ************* */ /* ******************** */
56 &U10, /**/ &U11, &U12,
57 UBL, /**/ UBR, &U20, /**/ &U21, &U22,
58 b, b, FLA_BR );
59
60 FLA_Repart_2x2_to_3x3( DTL, /**/ DTR, &D00, /**/ &D01, &D02,
61 /* ************* */ /* ******************** */
62 &D10, /**/ &D11, &D12,
63 DBL, /**/ DBR, &D20, /**/ &D21, &D22,
64 b, b, FLA_BR );
65
66 FLA_Repart_1x2_to_1x3( TL, /**/ TR, &T0, /**/ &T1, &W12,
67 b, FLA_RIGHT );
68
69 /*------------------------------------------------------------*/
70
71 // T1T = FLA_Top_part( T1, b );
72
74 &T2B, b, FLA_TOP );
75
77 D11, &D1 );
78
79 // [ U11, ...
80 // D1, T1 ] = FLA_CAQR2_UT( U11
81 // D1, T1T );
82
84 D1, T1T,
85 FLA_Cntl_sub_caqr2ut( cntl ) );
86
87
88 if ( FLA_Obj_width( U12 ) > 0 )
89 {
90 // W12T = FLA_Top_part( W12, b );
91
93 &W12B, b, FLA_TOP );
94
95 // W12T = inv( triu( T1T ) )' * ( U12 + D1' * D2 );
96 // = inv( triu( T1T ) )' * ( U12 + D01' * D02 + D11' * D12 );
97
99 FLA_Cntl_sub_copy( cntl ) );
100
103 FLA_ONE, D11, W12T,
104 FLA_Cntl_sub_trmm1( cntl ) );
105
108 FLA_Cntl_sub_gemm1( cntl ) );
109
111 FLA_Cntl_sub_axpy1( cntl ) );
112
115 FLA_ONE, T1T, W12T,
116 FLA_Cntl_sub_trsm( cntl ) );
117
118 // U12 = U12 - W12T;
119 // D2 = D2 - D1 * W12T;
120 // => D02 = D02 - D01 * W12T;
121 // D12 = D12 - D11 * W12T;
122
124 FLA_Cntl_sub_axpy2( cntl ) );
125
128 FLA_Cntl_sub_gemm2( cntl ) );
129
132 FLA_ONE, D11, W12T,
133 FLA_Cntl_sub_trmm2( cntl ) );
134
136 FLA_Cntl_sub_axpy3( cntl ) );
137 }
138
139 /*------------------------------------------------------------*/
140
141 FLA_Cont_with_3x3_to_2x2( &UTL, /**/ &UTR, U00, U01, /**/ U02,
142 U10, U11, /**/ U12,
143 /* ************** */ /* ****************** */
144 &UBL, /**/ &UBR, U20, U21, /**/ U22,
145 FLA_TL );
146
147 FLA_Cont_with_3x3_to_2x2( &DTL, /**/ &DTR, D00, D01, /**/ D02,
148 D10, D11, /**/ D12,
149 /* ************** */ /* ****************** */
150 &DBL, /**/ &DBR, D20, D21, /**/ D22,
151 FLA_TL );
152
153 FLA_Cont_with_1x3_to_1x2( &TL, /**/ &TR, T0, T1, /**/ W12,
154 FLA_LEFT );
155
156 }
157
158 return FLA_SUCCESS;
159}
FLA_Error FLA_Axpy_internal(FLA_Obj alpha, FLA_Obj A, FLA_Obj B, fla_axpy_t *cntl)
Definition FLA_Axpy_internal.c:16
FLA_Error FLA_CAQR2_UT_internal(FLA_Obj U, FLA_Obj D, FLA_Obj T, fla_caqr2ut_t *cntl)
Definition FLA_CAQR2_UT_internal.c:16
FLA_Error FLA_Copy_internal(FLA_Obj A, FLA_Obj B, fla_copy_t *cntl)
Definition FLA_Copy_internal.c:16
FLA_Error FLA_Gemm_internal(FLA_Trans transa, FLA_Trans transb, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C, fla_gemm_t *cntl)
Definition FLA_Gemm_internal.c:16
FLA_Error FLA_Trmm_internal(FLA_Side side, FLA_Uplo uplo, FLA_Trans transa, FLA_Diag diag, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, fla_trmm_t *cntl)
Definition FLA_Trmm_internal.c:16
FLA_Error FLA_Trsm_internal(FLA_Side side, FLA_Uplo uplo, FLA_Trans transa, FLA_Diag diag, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, fla_trsm_t *cntl)
Definition FLA_Trsm_internal.c:16
FLA_Obj FLA_MINUS_ONE
Definition FLA_Init.c:22
FLA_Obj FLA_ONE
Definition FLA_Init.c:18
FLA_Error FLA_Cont_with_3x3_to_2x2(FLA_Obj *ATL, FLA_Obj *ATR, FLA_Obj A00, FLA_Obj A01, FLA_Obj A02, FLA_Obj A10, FLA_Obj A11, FLA_Obj A12, FLA_Obj *ABL, FLA_Obj *ABR, FLA_Obj A20, FLA_Obj A21, FLA_Obj A22, FLA_Quadrant quadrant)
Definition FLA_View.c:304
FLA_Error FLA_Part_2x2(FLA_Obj A, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition FLA_View.c:17
FLA_Error FLA_Cont_with_1x3_to_1x2(FLA_Obj *AL, FLA_Obj *AR, FLA_Obj A0, FLA_Obj A1, FLA_Obj A2, FLA_Side side)
Definition FLA_View.c:475
dim_t FLA_Obj_width(FLA_Obj obj)
Definition FLA_Query.c:123
FLA_Error FLA_Part_1x2(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition FLA_View.c:110
FLA_Error FLA_Part_2x1(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t mb, FLA_Side side)
Definition FLA_View.c:76
dim_t FLA_Obj_length(FLA_Obj obj)
Definition FLA_Query.c:116
FLA_Error FLA_Repart_2x2_to_3x3(FLA_Obj ATL, FLA_Obj ATR, FLA_Obj *A00, FLA_Obj *A01, FLA_Obj *A02, FLA_Obj *A10, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj ABL, FLA_Obj ABR, FLA_Obj *A20, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition FLA_View.c:142
FLA_Error FLA_Repart_1x2_to_1x3(FLA_Obj AL, FLA_Obj AR, FLA_Obj *A0, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition FLA_View.c:267
FLA_Error FLA_Merge_2x1(FLA_Obj AT, FLA_Obj AB, FLA_Obj *A)
Definition FLA_View.c:541
dim_t FLA_Obj_min_dim(FLA_Obj obj)
Definition FLA_Query.c:153
unsigned long dim_t
Definition FLA_type_defs.h:71
int i
Definition bl1_axmyv2.c:145
Definition FLA_type_defs.h:159

References FLA_Axpy_internal(), FLA_CAQR2_UT_internal(), FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x3_to_2x2(), FLA_Copy_internal(), FLA_Gemm_internal(), FLA_Merge_2x1(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_Obj_min_dim(), FLA_Obj_width(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x2_to_3x3(), FLA_Trmm_internal(), FLA_Trsm_internal(), and i.

Referenced by FLA_CAQR2_UT_internal().

◆ FLA_CAQR2_UT_blk_var2()

FLA_Error FLA_CAQR2_UT_blk_var2 ( FLA_Obj  U,
FLA_Obj  D,
FLA_Obj  T,
fla_caqr2ut_t cntl 
)
15{
16 FLA_Obj DT, D0,
17 DB, D1,
18 D2;
19
20 FLA_Obj TT, T0,
21 TB, T1,
22 T2;
23
24 dim_t b;
25
26 FLA_Part_2x1( D, &DT,
27 &DB, 0, FLA_TOP );
28
29 FLA_Part_2x1( T, &TT,
30 &TB, 0, FLA_TOP );
31
32 while ( FLA_Obj_length( DT ) < FLA_Obj_length( D ) ){
33
35
37 /* ** */ /* ****** */
38 &D1,
39 DB, &D2, b, FLA_BOTTOM );
40
42 /* ** */ /* ****** */
43 &T1,
44 TB, &T2, b, FLA_BOTTOM );
45
46 /*------------------------------------------------------------*/
47
48 // [ U, ...
49 // D1, T ] = FLA_CAQR2_UT( U
50 // D1, T1 );
51
53 D1, T1,
54 FLA_Cntl_sub_caqr2ut( cntl ) );
55
56 /*------------------------------------------------------------*/
57
59 D1,
60 /* ** */ /* ****** */
61 &DB, D2, FLA_TOP );
62
64 T1,
65 /* ** */ /* ****** */
66 &TB, T2, FLA_TOP );
67 }
68
69 return FLA_SUCCESS;
70}
FLA_Error FLA_Cont_with_3x1_to_2x1(FLA_Obj *AT, FLA_Obj A0, FLA_Obj A1, FLA_Obj *AB, FLA_Obj A2, FLA_Side side)
Definition FLA_View.c:428
FLA_Error FLA_Repart_2x1_to_3x1(FLA_Obj AT, FLA_Obj *A0, FLA_Obj *A1, FLA_Obj AB, FLA_Obj *A2, dim_t mb, FLA_Side side)
Definition FLA_View.c:226
dim_t FLA_Determine_blocksize(FLA_Obj A_unproc, FLA_Quadrant to_dir, fla_blocksize_t *cntl_blocksizes)
Definition FLA_Blocksize.c:234

References FLA_CAQR2_UT_internal(), FLA_Cont_with_3x1_to_2x1(), FLA_Determine_blocksize(), FLA_Obj_length(), FLA_Part_2x1(), FLA_Repart_2x1_to_3x1(), and i.

Referenced by FLA_CAQR2_UT_internal().

◆ FLA_CAQR2_UT_opc_var1()

FLA_Error FLA_CAQR2_UT_opc_var1 ( int  m_UT,
int  m_D,
scomplex U,
int  rs_U,
int  cs_U,
scomplex D,
int  rs_D,
int  cs_D,
scomplex T,
int  rs_T,
int  cs_T 
)
270{
272 int i, j;
273 int m_DT = m_D - mn_UT;
274
275 for ( i = m_DT, j = 0; j < mn_UT; ++i, ++j )
276 {
277 scomplex* upsilon11 = buff_U + (j )*cs_U + (j )*rs_U;
278 scomplex* u12t = buff_U + (j+1)*cs_U + (j )*rs_U;
279
280 scomplex* D00 = buff_D + (0 )*cs_D + (0 )*rs_D;
281 scomplex* d1 = buff_D + (j )*cs_D + (0 )*rs_D;
282 scomplex* D2 = buff_D + (j+1)*cs_D + (0 )*rs_D;
283
284 scomplex* tau11 = buff_T + (j )*cs_T + (j )*rs_T;
285 scomplex* t01 = buff_T + (j )*cs_T + (0 )*rs_T;
286
287 scomplex* d1B = d1 + (m_DT)*rs_D;
288 scomplex* D00B = D00 + (m_DT)*rs_D;
289
290 int m_behind = i;
291 int n_behind = j;
292 int mn_ahead = mn_UT - j - 1;
293
294 //------------------------------------------------------------//
295
296 // FLA_Househ2_UT( FLA_LEFT,
297 // upsilon11,
298 // d1, tau11 );
300 upsilon11,
301 d1, rs_D,
302 tau11 );
303
304 // FLA_Apply_H2_UT( FLA_LEFT, tau11, d1, u12t,
305 // D2 );
307 mn_ahead,
308 tau11,
309 d1, rs_D,
310 u12t, cs_U,
311 D2, rs_D, cs_D );
312
313 // FLA_Copy_external( d01B, t01 );
314 // FLA_Trmv_external( FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG,
315 // D00B, t01 );
316 // FLA_Gemv_external( FLA_CONJ_TRANSPOSE, FLA_ONE, D00T, d01T, FLA_ONE, t01 );
318 n_behind,
319 d1B, rs_D,
320 t01, rs_T );
324 n_behind,
325 D00B, rs_D, cs_D,
326 t01, rs_T );
329 m_DT,
330 n_behind,
331 buff_1,
332 D00, rs_D, cs_D,
333 d1, rs_D,
334 buff_1,
335 t01, rs_T );
336
337 //------------------------------------------------------------//
338
339 }
340
341 return FLA_SUCCESS;
342}
FLA_Error FLA_Apply_H2_UT_l_opc_var1(int m_u2_A2, int n_a1t, scomplex *tau, scomplex *u2, int inc_u2, scomplex *a1t, int inc_a1t, scomplex *A2, int rs_A2, int cs_A2)
Definition FLA_Apply_H2_UT_l_opt_var1.c:269
FLA_Error FLA_Househ2_UT_l_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *tau)
Definition FLA_Househ2_UT.c:390
void bl1_ccopyv(conj1_t conj, int m, scomplex *x, int incx, scomplex *y, int incy)
Definition bl1_copyv.c:49
void bl1_cgemv(trans1_t transa, conj1_t conjx, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition bl1_gemv.c:125
void bl1_ctrmv(uplo1_t uplo, trans1_t trans, diag1_t diag, int m, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx)
Definition bl1_trmv.c:99
@ BLIS1_UPPER_TRIANGULAR
Definition blis_type_defs.h:63
@ BLIS1_NONUNIT_DIAG
Definition blis_type_defs.h:74
@ BLIS1_CONJ_TRANSPOSE
Definition blis_type_defs.h:57
@ BLIS1_NO_CONJUGATE
Definition blis_type_defs.h:81
Definition blis_type_defs.h:133

References bl1_ccopyv(), bl1_cgemv(), bl1_ctrmv(), BLIS1_CONJ_TRANSPOSE, BLIS1_NO_CONJUGATE, BLIS1_NONUNIT_DIAG, BLIS1_UPPER_TRIANGULAR, FLA_Apply_H2_UT_l_opc_var1(), FLA_Househ2_UT_l_opc(), FLA_ONE, and i.

Referenced by FLA_CAQR2_UT_opt_var1().

◆ FLA_CAQR2_UT_opd_var1()

FLA_Error FLA_CAQR2_UT_opd_var1 ( int  m_UT,
int  m_D,
double U,
int  rs_U,
int  cs_U,
double D,
int  rs_D,
int  cs_D,
double T,
int  rs_T,
int  cs_T 
)
189{
190 double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
191 int i, j;
192 int m_DT = m_D - mn_UT;
193
194 for ( i = m_DT, j = 0; j < mn_UT; ++i, ++j )
195 {
196 double* upsilon11 = buff_U + (j )*cs_U + (j )*rs_U;
197 double* u12t = buff_U + (j+1)*cs_U + (j )*rs_U;
198
199 double* D00 = buff_D + (0 )*cs_D + (0 )*rs_D;
200 double* d1 = buff_D + (j )*cs_D + (0 )*rs_D;
201 double* D2 = buff_D + (j+1)*cs_D + (0 )*rs_D;
202
203 double* tau11 = buff_T + (j )*cs_T + (j )*rs_T;
204 double* t01 = buff_T + (j )*cs_T + (0 )*rs_T;
205
206 double* d1B = d1 + (m_DT)*rs_D;
207 double* D00B = D00 + (m_DT)*rs_D;
208
209 int m_behind = i;
210 int n_behind = j;
211 int mn_ahead = mn_UT - j - 1;
212
213 //------------------------------------------------------------//
214
215 // FLA_Househ2_UT( FLA_LEFT,
216 // upsilon11,
217 // d1, tau11 );
219 upsilon11,
220 d1, rs_D,
221 tau11 );
222
223 // FLA_Apply_H2_UT( FLA_LEFT, tau11, d1, u12t,
224 // D2 );
226 mn_ahead,
227 tau11,
228 d1, rs_D,
229 u12t, cs_U,
230 D2, rs_D, cs_D );
231
232 // FLA_Copy_external( d01B, t01 );
233 // FLA_Trmv_external( FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG,
234 // D00B, t01 );
235 // FLA_Gemv_external( FLA_CONJ_TRANSPOSE, FLA_ONE, D00T, d01T, FLA_ONE, t01 );
237 n_behind,
238 d1B, rs_D,
239 t01, rs_T );
243 n_behind,
244 D00B, rs_D, cs_D,
245 t01, rs_T );
248 m_DT,
249 n_behind,
250 buff_1,
251 D00, rs_D, cs_D,
252 d1, rs_D,
253 buff_1,
254 t01, rs_T );
255
256 //------------------------------------------------------------//
257
258 }
259
260 return FLA_SUCCESS;
261}
FLA_Error FLA_Apply_H2_UT_l_opd_var1(int m_u2_A2, int n_a1t, double *tau, double *u2, int inc_u2, double *a1t, int inc_a1t, double *A2, int rs_A2, int cs_A2)
Definition FLA_Apply_H2_UT_l_opt_var1.c:195
FLA_Error FLA_Househ2_UT_l_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *tau)
Definition FLA_Househ2_UT.c:274
void bl1_dcopyv(conj1_t conj, int m, double *x, int incx, double *y, int incy)
Definition bl1_copyv.c:42
void bl1_dgemv(trans1_t transa, conj1_t conjx, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
Definition bl1_gemv.c:69
void bl1_dtrmv(uplo1_t uplo, trans1_t trans, diag1_t diag, int m, double *a, int a_rs, int a_cs, double *x, int incx)
Definition bl1_trmv.c:56

References bl1_dcopyv(), bl1_dgemv(), bl1_dtrmv(), BLIS1_CONJ_TRANSPOSE, BLIS1_NO_CONJUGATE, BLIS1_NONUNIT_DIAG, BLIS1_UPPER_TRIANGULAR, FLA_Apply_H2_UT_l_opd_var1(), FLA_Househ2_UT_l_opd(), FLA_ONE, and i.

Referenced by FLA_CAQR2_UT_opt_var1().

◆ FLA_CAQR2_UT_ops_var1()

FLA_Error FLA_CAQR2_UT_ops_var1 ( int  m_UT,
int  m_D,
float U,
int  rs_U,
int  cs_U,
float D,
int  rs_D,
int  cs_D,
float T,
int  rs_T,
int  cs_T 
)
108{
109 float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
110 int i, j;
111 int m_DT = m_D - mn_UT;
112
113 for ( i = m_DT, j = 0; j < mn_UT; ++i, ++j )
114 {
115 float* upsilon11 = buff_U + (j )*cs_U + (j )*rs_U;
116 float* u12t = buff_U + (j+1)*cs_U + (j )*rs_U;
117
118 float* D00 = buff_D + (0 )*cs_D + (0 )*rs_D;
119 float* d1 = buff_D + (j )*cs_D + (0 )*rs_D;
120 float* D2 = buff_D + (j+1)*cs_D + (0 )*rs_D;
121
122 float* tau11 = buff_T + (j )*cs_T + (j )*rs_T;
123 float* t01 = buff_T + (j )*cs_T + (0 )*rs_T;
124
125 float* d1B = d1 + (m_DT)*rs_D;
126 float* D00B = D00 + (m_DT)*rs_D;
127
128 int m_behind = i;
129 int n_behind = j;
130 int mn_ahead = mn_UT - j - 1;
131
132 //------------------------------------------------------------//
133
134 // FLA_Househ2_UT( FLA_LEFT,
135 // upsilon11,
136 // d1, tau11 );
138 upsilon11,
139 d1, rs_D,
140 tau11 );
141
142 // FLA_Apply_H2_UT( FLA_LEFT, tau11, d1, u12t,
143 // D2 );
145 mn_ahead,
146 tau11,
147 d1, rs_D,
148 u12t, cs_U,
149 D2, rs_D, cs_D );
150
151 // FLA_Copy_external( d01B, t01 );
152 // FLA_Trmv_external( FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG,
153 // D00B, t01 );
154 // FLA_Gemv_external( FLA_CONJ_TRANSPOSE, FLA_ONE, D00T, d01T, FLA_ONE, t01 );
156 n_behind,
157 d1B, rs_D,
158 t01, rs_T );
162 n_behind,
163 D00B, rs_D, cs_D,
164 t01, rs_T );
167 m_DT,
168 n_behind,
169 buff_1,
170 D00, rs_D, cs_D,
171 d1, rs_D,
172 buff_1,
173 t01, rs_T );
174
175 //------------------------------------------------------------//
176
177 }
178
179 return FLA_SUCCESS;
180}
FLA_Error FLA_Apply_H2_UT_l_ops_var1(int m_u2_A2, int n_a1t, float *tau, float *u2, int inc_u2, float *a1t, int inc_a1t, float *A2, int rs_A2, int cs_A2)
Definition FLA_Apply_H2_UT_l_opt_var1.c:121
FLA_Error FLA_Househ2_UT_l_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *tau)
Definition FLA_Househ2_UT.c:160
void bl1_scopyv(conj1_t conj, int m, float *x, int incx, float *y, int incy)
Definition bl1_copyv.c:35
void bl1_sgemv(trans1_t transa, conj1_t conjx, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
Definition bl1_gemv.c:13
void bl1_strmv(uplo1_t uplo, trans1_t trans, diag1_t diag, int m, float *a, int a_rs, int a_cs, float *x, int incx)
Definition bl1_trmv.c:13

References bl1_scopyv(), bl1_sgemv(), bl1_strmv(), BLIS1_CONJ_TRANSPOSE, BLIS1_NO_CONJUGATE, BLIS1_NONUNIT_DIAG, BLIS1_UPPER_TRIANGULAR, FLA_Apply_H2_UT_l_ops_var1(), FLA_Househ2_UT_l_ops(), FLA_ONE, and i.

Referenced by FLA_CAQR2_UT_opt_var1().

◆ FLA_CAQR2_UT_opt_var1()

FLA_Error FLA_CAQR2_UT_opt_var1 ( FLA_Obj  U,
FLA_Obj  D,
FLA_Obj  T 
)
15{
16 FLA_Datatype datatype;
17 int mn_UT, m_D;
18 int rs_U, cs_U;
19 int rs_D, cs_D;
20 int rs_T, cs_T;
21
22 datatype = FLA_Obj_datatype( U );
23
25 m_D = FLA_Obj_length( D );
26
33
34
35 switch ( datatype )
36 {
37 case FLA_FLOAT:
38 {
39 float* buff_U = FLA_FLOAT_PTR( U );
40 float* buff_D = FLA_FLOAT_PTR( D );
41 float* buff_T = FLA_FLOAT_PTR( T );
42
44 m_D,
47 buff_T, rs_T, cs_T );
48
49 break;
50 }
51
52 case FLA_DOUBLE:
53 {
54 double* buff_U = FLA_DOUBLE_PTR( U );
55 double* buff_D = FLA_DOUBLE_PTR( D );
56 double* buff_T = FLA_DOUBLE_PTR( T );
57
59 m_D,
62 buff_T, rs_T, cs_T );
63
64 break;
65 }
66
67 case FLA_COMPLEX:
68 {
72
74 m_D,
77 buff_T, rs_T, cs_T );
78
79 break;
80 }
81
83 {
87
89 m_D,
92 buff_T, rs_T, cs_T );
93
94 break;
95 }
96 }
97
98 return FLA_SUCCESS;
99}
FLA_Error FLA_CAQR2_UT_opd_var1(int mn_UT, int m_D, double *buff_U, int rs_U, int cs_U, double *buff_D, int rs_D, int cs_D, double *buff_T, int rs_T, int cs_T)
Definition FLA_CAQR2_UT_opt_var1.c:184
FLA_Error FLA_CAQR2_UT_opz_var1(int mn_UT, int m_D, dcomplex *buff_U, int rs_U, int cs_U, dcomplex *buff_D, int rs_D, int cs_D, dcomplex *buff_T, int rs_T, int cs_T)
Definition FLA_CAQR2_UT_opt_var1.c:346
FLA_Error FLA_CAQR2_UT_opc_var1(int mn_UT, int m_D, scomplex *buff_U, int rs_U, int cs_U, scomplex *buff_D, int rs_D, int cs_D, scomplex *buff_T, int rs_T, int cs_T)
Definition FLA_CAQR2_UT_opt_var1.c:265
FLA_Error FLA_CAQR2_UT_ops_var1(int mn_UT, int m_D, float *buff_U, int rs_U, int cs_U, float *buff_D, int rs_D, int cs_D, float *buff_T, int rs_T, int cs_T)
Definition FLA_CAQR2_UT_opt_var1.c:103
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition FLA_Query.c:167
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition FLA_Query.c:174
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition FLA_Query.c:13
int FLA_Datatype
Definition FLA_type_defs.h:49
Definition blis_type_defs.h:138

References FLA_CAQR2_UT_opc_var1(), FLA_CAQR2_UT_opd_var1(), FLA_CAQR2_UT_ops_var1(), FLA_CAQR2_UT_opz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), and i.

Referenced by FLA_CAQR2_UT_internal().

◆ FLA_CAQR2_UT_opz_var1()

FLA_Error FLA_CAQR2_UT_opz_var1 ( int  m_UT,
int  m_D,
dcomplex U,
int  rs_U,
int  cs_U,
dcomplex D,
int  rs_D,
int  cs_D,
dcomplex T,
int  rs_T,
int  cs_T 
)
351{
353 int i, j;
354 int m_DT = m_D - mn_UT;
355
356 for ( i = m_DT, j = 0; j < mn_UT; ++i, ++j )
357 {
358 dcomplex* upsilon11 = buff_U + (j )*cs_U + (j )*rs_U;
359 dcomplex* u12t = buff_U + (j+1)*cs_U + (j )*rs_U;
360
361 dcomplex* D00 = buff_D + (0 )*cs_D + (0 )*rs_D;
362 dcomplex* d1 = buff_D + (j )*cs_D + (0 )*rs_D;
363 dcomplex* D2 = buff_D + (j+1)*cs_D + (0 )*rs_D;
364
365 dcomplex* tau11 = buff_T + (j )*cs_T + (j )*rs_T;
366 dcomplex* t01 = buff_T + (j )*cs_T + (0 )*rs_T;
367
368 dcomplex* d1B = d1 + (m_DT)*rs_D;
369 dcomplex* D00B = D00 + (m_DT)*rs_D;
370
371 int m_behind = i;
372 int n_behind = j;
373 int mn_ahead = mn_UT - j - 1;
374
375 //------------------------------------------------------------//
376
377 // FLA_Househ2_UT( FLA_LEFT,
378 // upsilon11,
379 // d1, tau11 );
381 upsilon11,
382 d1, rs_D,
383 tau11 );
384
385 // FLA_Apply_H2_UT( FLA_LEFT, tau11, d1, u12t,
386 // D2 );
388 mn_ahead,
389 tau11,
390 d1, rs_D,
391 u12t, cs_U,
392 D2, rs_D, cs_D );
393
394 // FLA_Copy_external( d01B, t01 );
395 // FLA_Trmv_external( FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG,
396 // D00B, t01 );
397 // FLA_Gemv_external( FLA_CONJ_TRANSPOSE, FLA_ONE, D00T, d01T, FLA_ONE, t01 );
399 n_behind,
400 d1B, rs_D,
401 t01, rs_T );
405 n_behind,
406 D00B, rs_D, cs_D,
407 t01, rs_T );
410 m_DT,
411 n_behind,
412 buff_1,
413 D00, rs_D, cs_D,
414 d1, rs_D,
415 buff_1,
416 t01, rs_T );
417
418 //------------------------------------------------------------//
419
420 }
421
422 return FLA_SUCCESS;
423}
FLA_Error FLA_Apply_H2_UT_l_opz_var1(int m_u2_A2, int n_a1t, dcomplex *tau, dcomplex *u2, int inc_u2, dcomplex *a1t, int inc_a1t, dcomplex *A2, int rs_A2, int cs_A2)
Definition FLA_Apply_H2_UT_l_opt_var1.c:343
FLA_Error FLA_Househ2_UT_l_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *tau)
Definition FLA_Househ2_UT.c:521
void bl1_zcopyv(conj1_t conj, int m, dcomplex *x, int incx, dcomplex *y, int incy)
Definition bl1_copyv.c:63
void bl1_zgemv(trans1_t transa, conj1_t conjx, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition bl1_gemv.c:255
void bl1_ztrmv(uplo1_t uplo, trans1_t trans, diag1_t diag, int m, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx)
Definition bl1_trmv.c:177

References bl1_zcopyv(), bl1_zgemv(), bl1_ztrmv(), BLIS1_CONJ_TRANSPOSE, BLIS1_NO_CONJUGATE, BLIS1_NONUNIT_DIAG, BLIS1_UPPER_TRIANGULAR, FLA_Apply_H2_UT_l_opz_var1(), FLA_Househ2_UT_l_opz(), FLA_ONE, and i.

Referenced by FLA_CAQR2_UT_opt_var1().

◆ FLA_CAQR2_UT_unb_var1()

FLA_Error FLA_CAQR2_UT_unb_var1 ( FLA_Obj  U,
FLA_Obj  D,
FLA_Obj  T 
)
15{
16 FLA_Obj UTL, UTR, U00, u01, U02,
18 U20, u21, U22;
19
20 FLA_Obj DTL, DTR, D00, d01, D02,
22 D20, d21, D22;
23
24 FLA_Obj TTL, TTR, T00, t01, T02,
25 TBL, TBR, t10t, tau11, t12t,
26 T20, t21, T22;
27
28 FLA_Obj d1, D2;
29
31 d01B;
32
34 D00B;
35
36 dim_t m_DT;
37
38 // Begin partitioning diagonally through D with m - n rows above
39 // the diagonal.
41
42 FLA_Part_2x2( U, &UTL, &UTR,
43 &UBL, &UBR, 0, 0, FLA_TL );
44
45 FLA_Part_2x2( D, &DTL, &DTR,
46 &DBL, &DBR, m_DT, 0, FLA_TL );
47
48 FLA_Part_2x2( T, &TTL, &TTR,
49 &TBL, &TBR, 0, 0, FLA_TL );
50
51 while ( FLA_Obj_min_dim( UBR ) > 0 ){
52
53 FLA_Repart_2x2_to_3x3( UTL, /**/ UTR, &U00, /**/ &u01, &U02,
54 /* ************* */ /* ************************** */
55 &u10t, /**/ &upsilon11, &u12t,
56 UBL, /**/ UBR, &U20, /**/ &u21, &U22,
57 1, 1, FLA_BR );
58
59 FLA_Repart_2x2_to_3x3( DTL, /**/ DTR, &D00, /**/ &d01, &D02,
60 /* ************* */ /* ************************** */
61 &d10t, /**/ &delta11, &d12t,
62 DBL, /**/ DBR, &D20, /**/ &d21, &D22,
63 1, 1, FLA_BR );
64
65 FLA_Repart_2x2_to_3x3( TTL, /**/ TTR, &T00, /**/ &t01, &T02,
66 /* ************* */ /* ************************ */
67 &t10t, /**/ &tau11, &t12t,
68 TBL, /**/ TBR, &T20, /**/ &t21, &T22,
69 1, 1, FLA_BR );
70
71 /*------------------------------------------------------------*/
72
74 delta11, &d1 );
75
77 d12t, &D2 );
78
79 // Compute tau11 and u2 from upsilon11 and d1 such that tau11 and u2
80 // determine a Householder transform H such that applying H from the
81 // left to the column vector consisting of upsilon11 and d1 annihilates
82 // the entries in d1 (and updates upsilon11).
85 d1, tau11 );
86
87 // / u12t \ = H / u12t \
88 // \ D2 / \ D2 /
89 //
90 // where H is formed from tau11 and d1.
92 D2 );
93
95 &d01B, m_DT, FLA_TOP );
96
98 &D00B, m_DT, FLA_TOP );
99
100 // t01 = D00' * d01;
101 // = D00T' * d01T + triu( D00B )' * d01B;
104 D00B, t01 );
106
107 /*------------------------------------------------------------*/
108
109 FLA_Cont_with_3x3_to_2x2( &UTL, /**/ &UTR, U00, u01, /**/ U02,
110 u10t, upsilon11, /**/ u12t,
111 /* ************** */ /* ************************ */
112 &UBL, /**/ &UBR, U20, u21, /**/ U22,
113 FLA_TL );
114
115 FLA_Cont_with_3x3_to_2x2( &DTL, /**/ &DTR, D00, d01, /**/ D02,
116 d10t, delta11, /**/ d12t,
117 /* ************** */ /* ************************ */
118 &DBL, /**/ &DBR, D20, d21, /**/ D22,
119 FLA_TL );
120
121 FLA_Cont_with_3x3_to_2x2( &TTL, /**/ &TTR, T00, t01, /**/ T02,
122 t10t, tau11, /**/ t12t,
123 /* ************** */ /* ********************** */
124 &TBL, /**/ &TBR, T20, t21, /**/ T22,
125 FLA_TL );
126 }
127
128 return FLA_SUCCESS;
129}
FLA_Error FLA_Copy_external(FLA_Obj A, FLA_Obj B)
Definition FLA_Copy_external.c:13
FLA_Error FLA_Gemv_external(FLA_Trans transa, FLA_Obj alpha, FLA_Obj A, FLA_Obj x, FLA_Obj beta, FLA_Obj y)
Definition FLA_Gemv_external.c:13
FLA_Error FLA_Trmv_external(FLA_Uplo uplo, FLA_Trans transa, FLA_Diag diag, FLA_Obj A, FLA_Obj x)
Definition FLA_Trmv_external.c:13
FLA_Error FLA_Apply_H2_UT(FLA_Side side, FLA_Obj tau, FLA_Obj u2, FLA_Obj a1, FLA_Obj A2)
Definition FLA_Apply_H2_UT.c:13
FLA_Error FLA_Househ2_UT(FLA_Side side, FLA_Obj chi_1, FLA_Obj x2, FLA_Obj tau)
Definition FLA_Househ2_UT.c:16

References FLA_Apply_H2_UT(), FLA_Cont_with_3x3_to_2x2(), FLA_Copy_external(), FLA_Gemv_external(), FLA_Househ2_UT(), FLA_Merge_2x1(), FLA_Obj_length(), FLA_Obj_min_dim(), FLA_Obj_width(), FLA_ONE, FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), FLA_Trmv_external(), and i.

Referenced by FLA_CAQR2_UT_internal().