libflame revision_anchor
Functions
FLA_QR_UT.h File Reference

(r)

Go to the source code of this file.

Functions

FLA_Error FLA_QR_UT (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_QR_UT_internal (FLA_Obj A, FLA_Obj T, fla_qrut_t *cntl)
 
FLA_Error FLA_QR_UT_copy_internal (FLA_Obj A, FLA_Obj T, FLA_Obj U, fla_qrut_t *cntl)
 
FLA_Error FLA_QR_UT_create_T (FLA_Obj A, FLA_Obj *T)
 
FLA_Error FLA_QR_UT_recover_tau (FLA_Obj T, FLA_Obj tau)
 
FLA_Error FLA_QR_UT_solve (FLA_Obj A, FLA_Obj T, FLA_Obj B, FLA_Obj X)
 
FLA_Error FLASH_QR_UT (FLA_Obj A, FLA_Obj TW)
 
FLA_Error FLASH_QR_UT_create_hier_matrices (FLA_Obj A_flat, dim_t depth, dim_t *b_flash, FLA_Obj *A, FLA_Obj *TW)
 
FLA_Error FLASH_QR_UT_solve (FLA_Obj A, FLA_Obj T, FLA_Obj B, FLA_Obj X)
 
FLA_Error FLA_QR_UT_form_Q (FLA_Obj A, FLA_Obj T, FLA_Obj Q)
 
FLA_Error FLA_QR_UT_form_Q_blk_var1 (FLA_Obj A, FLA_Obj T, FLA_Obj W)
 
FLA_Error FLA_QR_UT_form_Q_opt_var1 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_QR_UT_form_Q_ops_var1 (int m_A, int n_AT, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_QR_UT_form_Q_opd_var1 (int m_A, int n_AT, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_QR_UT_form_Q_opc_var1 (int m_A, int n_AT, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_QR_UT_form_Q_opz_var1 (int m_A, int n_AT, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T)
 

Function Documentation

◆ FLA_QR_UT()

FLA_Error FLA_QR_UT ( FLA_Obj  A,
FLA_Obj  T 
)
16{
18
19 // Check parameters.
22
23 // Invoke FLA_QR_UT_internal() with the standard control tree.
24 //r_val = FLA_QR_UT_internal( A, T, fla_qrut_cntl2 );
26
27 return r_val;
28}
fla_qrut_t * fla_qrut_cntl_leaf
Definition FLA_QR_UT_cntl_init.c:16
FLA_Error FLA_QR_UT_internal(FLA_Obj A, FLA_Obj T, fla_qrut_t *cntl)
Definition FLA_QR_UT_internal.c:17
FLA_Error FLA_QR_UT_check(FLA_Obj A, FLA_Obj T)
Definition FLA_QR_UT_check.c:13
unsigned int FLA_Check_error_level(void)
Definition FLA_Check.c:18
int FLA_Error
Definition FLA_type_defs.h:47
int i
Definition bl1_axmyv2.c:145

◆ FLA_QR_UT_copy_internal()

FLA_Error FLA_QR_UT_copy_internal ( FLA_Obj  A,
FLA_Obj  T,
FLA_Obj  U,
fla_qrut_t cntl 
)
14{
16
19
21 {
22 // Enqueue task.
26 NULL );
27 }
28 else
29 {
30 // Execute task immediately.
34 NULL );
35 }
36
37 return r_val;
38}
FLA_Bool FLASH_Queue_get_enabled(void)
Definition FLASH_Queue.c:171
FLA_Error FLA_QR_UT_copy_internal_check(FLA_Obj A, FLA_Obj T, FLA_Obj U, fla_qrut_t *cntl)
Definition FLA_QR_UT_copy_internal_check.c:13
FLA_Error FLA_QR_UT_copy_task(FLA_Obj A, FLA_Obj T, FLA_Obj U, fla_qrut_t *cntl)
Definition FLA_QR_UT_copy_task.c:15

References FLA_Check_error_level(), FLA_QR_UT_copy_internal_check(), FLA_QR_UT_copy_task(), FLASH_Queue_get_enabled(), and i.

Referenced by FLA_QR_UT_inc_blk_var2().

◆ FLA_QR_UT_create_T()

FLA_Error FLA_QR_UT_create_T ( FLA_Obj  A,
FLA_Obj T 
)
14{
15 FLA_Datatype datatype;
16 dim_t b_alg, k;
18
19 // Query the datatype of A.
20 datatype = FLA_Obj_datatype( A );
21
22 // Query the blocksize from the library.
24
25 // Scale the blocksize by a pre-set global constant.
26 b_alg = ( dim_t )( ( ( double ) b_alg ) * FLA_QR_INNER_TO_OUTER_B_RATIO );
27
28 // Adjust the blocksize with respect to the min-dim of A.
30
31 // Query the width of A.
32 k = FLA_Obj_width( A );
33
34 // Figure out whether T should be row-major or column-major.
35 if ( FLA_Obj_row_stride( A ) == 1 )
36 {
37 rs_T = 1;
38 cs_T = b_alg;
39 }
40 else // if ( FLA_Obj_col_stride( A ) == 1 )
41 {
42 rs_T = k;
43 cs_T = 1;
44 }
45
46 // Create a b_alg x k matrix to hold the block Householder transforms that
47 // will be accumulated within the QR factorization algorithm.
48 FLA_Obj_create( datatype, b_alg, k, rs_T, cs_T, T );
49
50 return FLA_SUCCESS;
51}
dim_t FLA_Obj_width(FLA_Obj obj)
Definition FLA_Query.c:123
FLA_Error FLA_Obj_create(FLA_Datatype datatype, dim_t m, dim_t n, dim_t rs, dim_t cs, FLA_Obj *obj)
Definition FLA_Obj.c:55
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition FLA_Query.c:167
dim_t FLA_Query_blocksize(FLA_Datatype dt, FLA_Dimension dim)
Definition FLA_Blocksize.c:161
dim_t FLA_Obj_min_dim(FLA_Obj obj)
Definition FLA_Query.c:153
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition FLA_Query.c:13
int FLA_Datatype
Definition FLA_type_defs.h:49
unsigned long dim_t
Definition FLA_type_defs.h:71

References FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_min_dim(), FLA_Obj_row_stride(), FLA_Obj_width(), FLA_Query_blocksize(), and i.

Referenced by FLA_Random_unitary_matrix(), FLA_Svd_ext_u_unb_var1(), FLA_Svd_uv_unb_var1(), and FLA_Svd_uv_unb_var2().

◆ FLA_QR_UT_form_Q()

FLA_Error FLA_QR_UT_form_Q ( FLA_Obj  A,
FLA_Obj  T,
FLA_Obj  Q 
)
14{
17 QBL, QBR;
18 FLA_Obj W;
19 dim_t b;
20
23
24 if ( FLA_Obj_is_overlapped( A, Q ) == FALSE )
25 {
26 // If A and Q are different objects, Q is explicitly formed with A.
27
28 // Set Q identify
30
31 // Q = H_{0} H_{1} ... H_{k-1}
35 A, T, W, Q );
36 FLA_Obj_free( &W );
37
38 }
39 else
40 {
41 // If A and Q are the same objects, Q is formed in-place.
42 // - even if A and Q has the same base, they may have different
43 // dimensions.
44 // - width of T controls the loop in FLA_QR_UT_form_Q_blk_var1.
45
46 // Zero out the upper triangle of Q.
48
49 // Adjust T w.r.t A; W is a place holder.
50 if ( FLA_Obj_width( T ) > FLA_Obj_width( A ) )
51 FLA_Part_1x2( T, &T, &W,
53 FLA_LEFT );
54
55 // Zero out the lower triangle of QBR
56 if ( FLA_Obj_width( Q ) > FLA_Obj_width( T ) )
57 {
58 b = FLA_Obj_width( T );
59 FLA_Part_2x2( Q, &QTL, &QTR,
60 &QBL, &QBR, b, b, FLA_TL );
62 }
63
64 // Set the digaonal to one.
66
67 // Create workspace for applying the block Householder transforms.
69
70 // Overwrite Q, which currently contains Householder vectors in the
71 // strictly lower triangle and identity in the upper triangle, with
72 // the unitary matrix associated with those Householder transforms.
74
75 // Free the temporary workspace.
76 FLA_Obj_free( &W );
77 }
78 /*
79 FLA_Apply_Q_UT_create_workspace( T, Q, &W );
80 FLA_Set_to_identity( Q );
81 FLA_Apply_Q_UT( FLA_LEFT, FLA_NO_TRANSPOSE, FLA_FORWARD, FLA_COLUMNWISE,
82 A, T, W, Q );
83 FLA_Obj_free( &W );
84 FLA_Obj_show( "Q", Q, "%8.1e %8.1e ", "" );
85 */
86
87 return r_val;
88}
FLA_Error FLA_Apply_Q_UT_create_workspace_side(FLA_Side side, FLA_Obj T, FLA_Obj B, FLA_Obj *W)
Definition FLA_Apply_Q_UT_create_workspace.c:28
FLA_Error FLA_QR_UT_form_Q_blk_var1(FLA_Obj A, FLA_Obj T, FLA_Obj W)
Definition FLA_QR_UT_form_Q.c:90
FLA_Error FLA_QR_UT_form_Q_check(FLA_Obj A, FLA_Obj T, FLA_Obj Q)
Definition FLA_QR_UT_form_Q_check.c:13
FLA_Obj FLA_ZERO
Definition FLA_Init.c:20
FLA_Obj FLA_ONE
Definition FLA_Init.c:18
FLA_Error FLA_Apply_Q_UT(FLA_Side side, FLA_Trans trans, FLA_Direct direct, FLA_Store storev, FLA_Obj A, FLA_Obj T, FLA_Obj W, FLA_Obj B)
Definition FLA_Apply_Q_UT.c:16
FLA_Error FLA_Part_2x2(FLA_Obj A, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition FLA_View.c:17
FLA_Bool FLA_Obj_is_overlapped(FLA_Obj A, FLA_Obj B)
Definition FLA_Query.c:488
FLA_Error FLA_Part_1x2(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition FLA_View.c:110
FLA_Error FLA_Obj_free(FLA_Obj *obj)
Definition FLA_Obj.c:588
FLA_Error FLA_Setr(FLA_Uplo uplo, FLA_Obj alpha, FLA_Obj A)
Definition FLA_Setr.c:13
FLA_Error FLA_Set_diag(FLA_Obj alpha, FLA_Obj A)
Definition FLA_Set_diag.c:13
FLA_Error FLA_Set_to_identity(FLA_Obj A)
Definition FLA_Set_to_identity.c:13
Definition FLA_type_defs.h:159

References FLA_Apply_Q_UT(), FLA_Apply_Q_UT_create_workspace_side(), FLA_Check_error_level(), FLA_Obj_free(), FLA_Obj_is_overlapped(), FLA_Obj_width(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x2(), FLA_QR_UT_form_Q_blk_var1(), FLA_QR_UT_form_Q_check(), FLA_Set_diag(), FLA_Set_to_identity(), FLA_Setr(), FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_form_U_ext(), FLA_LQ_UT_form_Q(), FLA_Random_unitary_matrix(), FLA_Svd_ext_u_unb_var1(), FLA_Svd_uv_unb_var1(), FLA_Svd_uv_unb_var2(), and FLA_Tridiag_UT_form_Q().

◆ FLA_QR_UT_form_Q_blk_var1()

FLA_Error FLA_QR_UT_form_Q_blk_var1 ( FLA_Obj  A,
FLA_Obj  T,
FLA_Obj  W 
)
91{
93 ABL, ABR, A10, A11, A12,
94 A20, A21, A22;
95
96 FLA_Obj TL, TR, T0, T1, T2;
97
99 T2B;
100
101 FLA_Obj WTL, WTR,
102 WBL, WBR;
103
104 FLA_Obj AB1, AB2;
105
106 dim_t b, b_alg;
107 dim_t m_BR, n_BR;
108
110
111
112 // If A is wider than T, then we need to position ourseves carefully
113 // within the matrix for the initial partitioning.
114 if ( FLA_Obj_width( A ) > FLA_Obj_width( T ) )
115 {
118 }
119 else
120 {
122 n_BR = 0;
123 }
124
125 FLA_Part_2x2( A, &ATL, &ATR,
126 &ABL, &ABR, m_BR, n_BR, FLA_BR );
127
128 FLA_Part_1x2( T, &TL, &TR, 0, FLA_RIGHT );
129
130 while ( /* FLA_Obj_min_dim( ATL ) > 0 && */ FLA_Obj_width( TL ) > 0 )
131 {
132 b = min( b_alg, FLA_Obj_min_dim( ATL ) );
133
134 // Since T was filled from left to right, and since we need to access them
135 // in reverse order, we need to handle the case where the last block is
136 // smaller than the other b x b blocks.
137 if ( FLA_Obj_width( TR ) == 0 && FLA_Obj_width( T ) % b_alg > 0 )
138 b = FLA_Obj_width( T ) % b_alg;
139
140 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, &A01, /**/ &A02,
141 &A10, &A11, /**/ &A12,
142 /* ************* */ /* ******************** */
143 ABL, /**/ ABR, &A20, &A21, /**/ &A22,
144 b, b, FLA_TL );
145
146 FLA_Repart_1x2_to_1x3( TL, /**/ TR, &T0, &T1, /**/ &T2,
147 b, FLA_LEFT );
148
149 /*------------------------------------------------------------*/
150
152 &T2B, b, FLA_TOP );
153
154 FLA_Part_2x2( W, &WTL, &WTR,
155 &WBL, &WBR, b, FLA_Obj_width( A12 ), FLA_TL );
156
157 // Use an unblocked algorithm for the first (or only) block.
158 if ( FLA_Obj_length( ABR ) == 0 )
159 {
161 }
162 else
163 {
165 A21, &AB1 );
167 A22, &AB2 );
168
169 // Apply the block Householder transforms to A12 and A22.
171 AB1, T1T, WTL, AB2 );
172
173 // Apply H to the current block panel consisting of A11 and A21.
175 }
176
177 /*------------------------------------------------------------*/
178
179 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, /**/ A01, A02,
180 /* ************** */ /* ****************** */
181 A10, /**/ A11, A12,
182 &ABL, /**/ &ABR, A20, /**/ A21, A22,
183 FLA_BR );
184
185 FLA_Cont_with_1x3_to_1x2( &TL, /**/ &TR, T0, /**/ T1, T2,
186 FLA_RIGHT );
187 }
188
189 return FLA_SUCCESS;
190}
FLA_Error FLA_QR_UT_form_Q_opt_var1(FLA_Obj A, FLA_Obj T)
Definition FLA_QR_UT_form_Q.c:193
FLA_Error FLA_Cont_with_3x3_to_2x2(FLA_Obj *ATL, FLA_Obj *ATR, FLA_Obj A00, FLA_Obj A01, FLA_Obj A02, FLA_Obj A10, FLA_Obj A11, FLA_Obj A12, FLA_Obj *ABL, FLA_Obj *ABR, FLA_Obj A20, FLA_Obj A21, FLA_Obj A22, FLA_Quadrant quadrant)
Definition FLA_View.c:304
FLA_Error FLA_Cont_with_1x3_to_1x2(FLA_Obj *AL, FLA_Obj *AR, FLA_Obj A0, FLA_Obj A1, FLA_Obj A2, FLA_Side side)
Definition FLA_View.c:475
FLA_Error FLA_Part_2x1(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t mb, FLA_Side side)
Definition FLA_View.c:76
dim_t FLA_Obj_length(FLA_Obj obj)
Definition FLA_Query.c:116
FLA_Error FLA_Repart_2x2_to_3x3(FLA_Obj ATL, FLA_Obj ATR, FLA_Obj *A00, FLA_Obj *A01, FLA_Obj *A02, FLA_Obj *A10, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj ABL, FLA_Obj ABR, FLA_Obj *A20, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition FLA_View.c:142
FLA_Error FLA_Repart_1x2_to_1x3(FLA_Obj AL, FLA_Obj AR, FLA_Obj *A0, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition FLA_View.c:267
FLA_Error FLA_Merge_2x1(FLA_Obj AT, FLA_Obj AB, FLA_Obj *A)
Definition FLA_View.c:541

References FLA_Apply_Q_UT(), FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x3_to_2x2(), FLA_Merge_2x1(), FLA_Obj_length(), FLA_Obj_min_dim(), FLA_Obj_width(), FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_QR_UT_form_Q_opt_var1(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x2_to_3x3(), and i.

Referenced by FLA_QR_UT_form_Q().

◆ FLA_QR_UT_form_Q_opc_var1()

FLA_Error FLA_QR_UT_form_Q_opc_var1 ( int  m_A,
int  n_AT,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_T,
int  rs_T,
int  cs_T 
)
373{
374 scomplex zero = bl1_c0();
375 scomplex one = bl1_c1();
376 int min_m_n = min( m_A, n_A );
377 int i;
378
379 for ( i = min_m_n - 1; i >= 0; --i )
380 {
381 //scomplex* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
382 scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
383 scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
384 scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
385 scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
386
387 scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
388
390
391 //int m_behind = i;
392 int n_ahead = n_A - i - 1;
393 int m_ahead = m_A - i - 1;
394
396 n_ahead,
397 tau11,
398 a21, rs_A,
399 a12t, cs_A,
400 A22, rs_A, cs_A );
401
402 minus_inv_tau11.real = -one.real / tau11->real;
403 minus_inv_tau11.imag = zero.imag;
404
405 alpha11->real = one.real + minus_inv_tau11.real;
406 alpha11->imag = zero.imag;
407
409 m_ahead,
411 a21, rs_A );
412
413 // Not necessary if upper triangle of A is initialized to identity.
414 //bl1_csetv( m_behind,
415 // &zero,
416 // a01, rs_A );
417 }
418
419 return FLA_SUCCESS;
420}
FLA_Error FLA_Apply_H2_UT_l_opc_var1(int m_u2_A2, int n_a1t, scomplex *tau, scomplex *u2, int inc_u2, scomplex *a1t, int inc_a1t, scomplex *A2, int rs_A2, int cs_A2)
Definition FLA_Apply_H2_UT_l_opt_var1.c:269
void bl1_cscalv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx)
Definition bl1_scalv.c:46
scomplex bl1_c1(void)
Definition bl1_constants.c:61
scomplex bl1_c0(void)
Definition bl1_constants.c:125
@ BLIS1_NO_CONJUGATE
Definition blis_type_defs.h:81
Definition blis_type_defs.h:133

References bl1_c0(), bl1_c1(), bl1_cscalv(), BLIS1_NO_CONJUGATE, FLA_Apply_H2_UT_l_opc_var1(), and i.

Referenced by FLA_QR_UT_form_Q_opt_var1().

◆ FLA_QR_UT_form_Q_opd_var1()

FLA_Error FLA_QR_UT_form_Q_opd_var1 ( int  m_A,
int  n_AT,
double buff_A,
int  rs_A,
int  cs_A,
double buff_T,
int  rs_T,
int  cs_T 
)
322{
323 double one = bl1_d1();
324 int min_m_n = min( m_A, n_A );
325 int i;
326
327 for ( i = min_m_n - 1; i >= 0; --i )
328 {
329 //double* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
330 double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
331 double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
332 double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
333 double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
334
335 double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
336
337 double minus_inv_tau11;
338
339 //int m_behind = i;
340 int n_ahead = n_A - i - 1;
341 int m_ahead = m_A - i - 1;
342
344 n_ahead,
345 tau11,
346 a21, rs_A,
347 a12t, cs_A,
348 A22, rs_A, cs_A );
349
351
353
355 m_ahead,
357 a21, rs_A );
358
359 // Not necessary if upper triangle of A is initialized to identity.
360 //bl1_dsetv( m_behind,
361 // &zero,
362 // a01, rs_A );
363 }
364
365 return FLA_SUCCESS;
366}
FLA_Error FLA_Apply_H2_UT_l_opd_var1(int m_u2_A2, int n_a1t, double *tau, double *u2, int inc_u2, double *a1t, int inc_a1t, double *A2, int rs_A2, int cs_A2)
Definition FLA_Apply_H2_UT_l_opt_var1.c:195
void bl1_dscalv(conj1_t conj, int n, double *alpha, double *x, int incx)
Definition bl1_scalv.c:24
double bl1_d1(void)
Definition bl1_constants.c:54

References bl1_d1(), bl1_dscalv(), BLIS1_NO_CONJUGATE, FLA_Apply_H2_UT_l_opd_var1(), and i.

Referenced by FLA_QR_UT_form_Q_opt_var1().

◆ FLA_QR_UT_form_Q_ops_var1()

FLA_Error FLA_QR_UT_form_Q_ops_var1 ( int  m_A,
int  n_AT,
float buff_A,
int  rs_A,
int  cs_A,
float buff_T,
int  rs_T,
int  cs_T 
)
272{
273 float one = bl1_d1();
274 int min_m_n = min( m_A, n_A );
275 int i;
276
277 for ( i = min_m_n - 1; i >= 0; --i )
278 {
279 //float* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
280 float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
281 float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
282 float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
283 float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
284
285 float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
286
287 float minus_inv_tau11;
288
289 //int m_behind = i;
290 int n_ahead = n_A - i - 1;
291 int m_ahead = m_A - i - 1;
292
294 n_ahead,
295 tau11,
296 a21, rs_A,
297 a12t, cs_A,
298 A22, rs_A, cs_A );
299
301
303
305 m_ahead,
307 a21, rs_A );
308
309 // Not necessary if upper triangle of A is initialized to identity.
310 //bl1_ssetv( m_behind,
311 // &zero,
312 // a01, rs_A );
313 }
314
315 return FLA_SUCCESS;
316}
FLA_Error FLA_Apply_H2_UT_l_ops_var1(int m_u2_A2, int n_a1t, float *tau, float *u2, int inc_u2, float *a1t, int inc_a1t, float *A2, int rs_A2, int cs_A2)
Definition FLA_Apply_H2_UT_l_opt_var1.c:121
void bl1_sscalv(conj1_t conj, int n, float *alpha, float *x, int incx)
Definition bl1_scalv.c:13

References bl1_d1(), bl1_sscalv(), BLIS1_NO_CONJUGATE, FLA_Apply_H2_UT_l_ops_var1(), and i.

Referenced by FLA_QR_UT_form_Q_opt_var1().

◆ FLA_QR_UT_form_Q_opt_var1()

FLA_Error FLA_QR_UT_form_Q_opt_var1 ( FLA_Obj  A,
FLA_Obj  T 
)
194{
195 FLA_Datatype datatype;
196 int m_A, n_A;
197 int rs_A, cs_A;
198 int rs_T, cs_T;
199
200 datatype = FLA_Obj_datatype( A );
201
202 m_A = FLA_Obj_length( A );
203 n_A = FLA_Obj_width( A );
206
209
210 switch ( datatype )
211 {
212 case FLA_FLOAT:
213 {
214 float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
215 float* buff_T = ( float* ) FLA_FLOAT_PTR( T );
216
218 n_A,
219 buff_A, rs_A, cs_A,
220 buff_T, rs_T, cs_T );
221
222 break;
223 }
224
225 case FLA_DOUBLE:
226 {
227 double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
228 double* buff_T = ( double* ) FLA_DOUBLE_PTR( T );
229
231 n_A,
232 buff_A, rs_A, cs_A,
233 buff_T, rs_T, cs_T );
234
235 break;
236 }
237
238 case FLA_COMPLEX:
239 {
242
244 n_A,
245 buff_A, rs_A, cs_A,
246 buff_T, rs_T, cs_T );
247
248 break;
249 }
250
252 {
255
257 n_A,
258 buff_A, rs_A, cs_A,
259 buff_T, rs_T, cs_T );
260
261 break;
262 }
263 }
264
265 return FLA_SUCCESS;
266}
FLA_Error FLA_QR_UT_form_Q_ops_var1(int m_A, int n_A, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T)
Definition FLA_QR_UT_form_Q.c:268
FLA_Error FLA_QR_UT_form_Q_opd_var1(int m_A, int n_A, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T)
Definition FLA_QR_UT_form_Q.c:318
FLA_Error FLA_QR_UT_form_Q_opc_var1(int m_A, int n_A, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T)
Definition FLA_QR_UT_form_Q.c:369
FLA_Error FLA_QR_UT_form_Q_opz_var1(int m_A, int n_A, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T)
Definition FLA_QR_UT_form_Q.c:422
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition FLA_Query.c:174
Definition blis_type_defs.h:138

References FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), FLA_QR_UT_form_Q_opc_var1(), FLA_QR_UT_form_Q_opd_var1(), FLA_QR_UT_form_Q_ops_var1(), FLA_QR_UT_form_Q_opz_var1(), and i.

Referenced by FLA_QR_UT_form_Q_blk_var1().

◆ FLA_QR_UT_form_Q_opz_var1()

FLA_Error FLA_QR_UT_form_Q_opz_var1 ( int  m_A,
int  n_AT,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_T,
int  rs_T,
int  cs_T 
)
426{
427 dcomplex zero = bl1_z0();
428 dcomplex one = bl1_z1();
429 int min_m_n = min( m_A, n_A );
430 int i;
431
432 for ( i = min_m_n - 1; i >= 0; --i )
433 {
434 //dcomplex* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
435 dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
436 dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
437 dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
438 dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
439
440 dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
441
443
444 //int m_behind = i;
445 int n_ahead = n_A - i - 1;
446 int m_ahead = m_A - i - 1;
447
449 n_ahead,
450 tau11,
451 a21, rs_A,
452 a12t, cs_A,
453 A22, rs_A, cs_A );
454
455 minus_inv_tau11.real = -one.real / tau11->real;
456 minus_inv_tau11.imag = zero.imag;
457
458 alpha11->real = one.real + minus_inv_tau11.real;
459 alpha11->imag = zero.imag;
460
462 m_ahead,
464 a21, rs_A );
465
466 // Not necessary if upper triangle of A is initialized to identity.
467 //bl1_zsetv( m_behind,
468 // &zero,
469 // a01, rs_A );
470 }
471
472 return FLA_SUCCESS;
473}
FLA_Error FLA_Apply_H2_UT_l_opz_var1(int m_u2_A2, int n_a1t, dcomplex *tau, dcomplex *u2, int inc_u2, dcomplex *a1t, int inc_a1t, dcomplex *A2, int rs_A2, int cs_A2)
Definition FLA_Apply_H2_UT_l_opt_var1.c:343
void bl1_zscalv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx)
Definition bl1_scalv.c:72
dcomplex bl1_z0(void)
Definition bl1_constants.c:133
dcomplex bl1_z1(void)
Definition bl1_constants.c:69

References bl1_z0(), bl1_z1(), bl1_zscalv(), BLIS1_NO_CONJUGATE, FLA_Apply_H2_UT_l_opz_var1(), and i.

Referenced by FLA_QR_UT_form_Q_opt_var1().

◆ FLA_QR_UT_internal()

FLA_Error FLA_QR_UT_internal ( FLA_Obj  A,
FLA_Obj  T,
fla_qrut_t cntl 
)
18{
20
23
24 if ( FLA_Cntl_matrix_type( cntl ) == FLA_HIER &&
26 {
28 {
29 // Enqueue
31 }
32 else
33 {
34 // Execute
36 }
37 }
38 else
39 {
41 {
43 }
44 else if ( FLA_Cntl_variant( cntl ) == FLA_UNB_OPT_VARIANT1 )
45 {
47 }
48 else if ( FLA_Cntl_variant( cntl ) == FLA_BLOCKED_VARIANT1 )
49 {
50 r_val = FLA_QR_UT_blk_var1( A, T, cntl );
51 }
52 else if ( FLA_Cntl_variant( cntl ) == FLA_UNBLOCKED_VARIANT2 )
53 {
55 }
56 else if ( FLA_Cntl_variant( cntl ) == FLA_UNB_OPT_VARIANT2 )
57 {
59 }
60 else if ( FLA_Cntl_variant( cntl ) == FLA_BLOCKED_VARIANT2 )
61 {
62 r_val = FLA_QR_UT_blk_var2( A, T, cntl );
63 }
64 else if ( FLA_Cntl_variant( cntl ) == FLA_BLOCKED_VARIANT3 )
65 {
66 r_val = FLA_QR_UT_blk_var3( A, T, cntl );
67 }
68 else
69 {
71 }
72 }
73
74 return r_val;
75}
FLA_Error FLA_QR_UT_blk_var1(FLA_Obj A, FLA_Obj T, fla_qrut_t *cntl)
Definition FLA_QR_UT_blk_var1.c:13
FLA_Error FLA_QR_UT_blk_var2(FLA_Obj A, FLA_Obj T, fla_qrut_t *cntl)
Definition FLA_QR_UT_blk_var2.c:13
FLA_Error FLA_QR_UT_blk_var3(FLA_Obj A, FLA_Obj TW, fla_qrut_t *cntl)
Definition FLA_QR_UT_blk_var3.c:13
FLA_Error FLA_QR_UT_internal_check(FLA_Obj A, FLA_Obj T, fla_qrut_t *cntl)
Definition FLA_QR_UT_internal_check.c:13
FLA_Error FLA_QR_UT_opt_var1(FLA_Obj A, FLA_Obj t)
Definition FLA_QR_UT_opt_var1.c:13
FLA_Error FLA_QR_UT_opt_var2(FLA_Obj A, FLA_Obj T)
Definition FLA_QR_UT_opt_var2.c:13
FLA_Error FLA_QR_UT_unb_var1(FLA_Obj A, FLA_Obj t)
Definition FLA_QR_UT_unb_var1.c:13
FLA_Error FLA_QR_UT_unb_var2(FLA_Obj A, FLA_Obj T)
Definition FLA_QR_UT_unb_var2.c:13
FLA_Error FLA_QR_UT_macro_task(FLA_Obj A, FLA_Obj T, fla_qrut_t *cntl)
Definition FLA_QR_UT_macro_task.c:15

References FLA_Check_error_level(), FLA_QR_UT_blk_var1(), FLA_QR_UT_blk_var2(), FLA_QR_UT_blk_var3(), FLA_QR_UT_internal_check(), FLA_QR_UT_macro_task(), FLA_QR_UT_opt_var1(), FLA_QR_UT_opt_var2(), FLA_QR_UT_unb_var1(), FLA_QR_UT_unb_var2(), FLASH_Queue_get_enabled(), and i.

Referenced by FLA_QR_UT(), FLA_QR_UT_blk_var1(), FLA_QR_UT_blk_var2(), FLA_QR_UT_blk_var3(), FLA_QR_UT_copy_task(), FLA_QR_UT_inc_blk_var1(), FLA_QR_UT_macro_task(), FLA_QR_UT_task(), and FLASH_QR_UT().

◆ FLA_QR_UT_recover_tau()

FLA_Error FLA_QR_UT_recover_tau ( FLA_Obj  T,
FLA_Obj  tau 
)
16{
17 FLA_Obj TL, TR, T0, T1, T2;
18
19 FLA_Obj tT, t0,
20 tB, t1,
21 t2;
22
23 dim_t b_alg, b;
24
27
29
30 FLA_Part_1x2( T, &TL, &TR, 0, FLA_LEFT );
31
32 FLA_Part_2x1( t, &tT,
33 &tB, 0, FLA_TOP );
34
35 // T matrix may include workspace; thus, T should not be placed as a loop guard.
36 while ( FLA_Obj_length( tB ) > 0 ) {
37
38 // The blocksize should be determined that the T matrix is square.
39 b = min( FLA_Obj_length( tB ), b_alg );
40
41 FLA_Repart_1x2_to_1x3( TL, /**/ TR, &T0, /**/ &T1, &T2,
42 b, FLA_RIGHT );
43
45 /* ** */ /* ** */
46 &t1,
47 tB, &t2, b, FLA_BOTTOM );
48
49 /*------------------------------------------------------------*/
50
52
53 /*------------------------------------------------------------*/
54
55 FLA_Cont_with_1x3_to_1x2( &TL, /**/ &TR, T0, T1, /**/ T2,
56 FLA_LEFT );
57
59 t1,
60 /* ** */ /* ** */
61 &tB, t2, FLA_TOP );
62 }
63
64 return FLA_SUCCESS;
65}
FLA_Error FLA_QR_UT_recover_tau_submatrix(FLA_Obj T, FLA_Obj t)
Definition FLA_QR_UT_recover_tau.c:68
FLA_Error FLA_QR_UT_recover_tau_check(FLA_Obj T, FLA_Obj tau)
Definition FLA_QR_UT_recover_tau_check.c:13
FLA_Error FLA_Cont_with_3x1_to_2x1(FLA_Obj *AT, FLA_Obj A0, FLA_Obj A1, FLA_Obj *AB, FLA_Obj A2, FLA_Side side)
Definition FLA_View.c:428
FLA_Error FLA_Repart_2x1_to_3x1(FLA_Obj AT, FLA_Obj *A0, FLA_Obj *A1, FLA_Obj AB, FLA_Obj *A2, dim_t mb, FLA_Side side)
Definition FLA_View.c:226

References FLA_Check_error_level(), FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x1_to_2x1(), FLA_Obj_length(), FLA_Part_1x2(), FLA_Part_2x1(), FLA_QR_UT_recover_tau_check(), FLA_QR_UT_recover_tau_submatrix(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x1_to_3x1(), and i.

Referenced by FLA_LQ_UT_recover_tau().

◆ FLA_QR_UT_solve()

FLA_Error FLA_QR_UT_solve ( FLA_Obj  A,
FLA_Obj  T,
FLA_Obj  B,
FLA_Obj  X 
)
14{
15 FLA_Obj W, Y;
16 FLA_Obj AT, AB;
17 FLA_Obj YT, YB;
18
19 // Check parameters.
22
24
26
28 A, T, W, Y );
29
30 FLA_Part_2x1( A, &AT,
31 &AB, FLA_Obj_width( A ), FLA_TOP );
32 FLA_Part_2x1( Y, &YT,
33 &YB, FLA_Obj_width( A ), FLA_TOP );
34
37
39
40 FLA_Obj_free( &Y );
41 FLA_Obj_free( &W );
42
43 return FLA_SUCCESS;
44}
FLA_Error FLA_Apply_Q_UT_create_workspace(FLA_Obj T, FLA_Obj B, FLA_Obj *W)
Definition FLA_Apply_Q_UT_create_workspace.c:13
FLA_Error FLA_QR_UT_solve_check(FLA_Obj A, FLA_Obj T, FLA_Obj B, FLA_Obj X)
Definition FLA_QR_UT_solve_check.c:13
FLA_Error FLA_Copy_external(FLA_Obj A, FLA_Obj B)
Definition FLA_Copy_external.c:13
FLA_Error FLA_Trsm_external(FLA_Side side, FLA_Uplo uplo, FLA_Trans trans, FLA_Diag diag, FLA_Obj alpha, FLA_Obj A, FLA_Obj B)
Definition FLA_Trsm_external.c:13
FLA_Error FLA_Obj_create_copy_of(FLA_Trans trans, FLA_Obj old, FLA_Obj *obj)
Definition FLA_Obj.c:345

References FLA_Apply_Q_UT(), FLA_Apply_Q_UT_create_workspace(), FLA_Check_error_level(), FLA_Copy_external(), FLA_Obj_create_copy_of(), FLA_Obj_free(), FLA_Obj_width(), FLA_ONE, FLA_Part_2x1(), FLA_QR_UT_solve_check(), FLA_Trsm_external(), and i.

◆ FLASH_QR_UT()

FLA_Error FLASH_QR_UT ( FLA_Obj  A,
FLA_Obj  TW 
)
17{
20
21 // Check parameters.
24
25 // *** The current hierarchical QR_UT algorithm assumes that the matrix
26 // has a hierarchical depth of 1. We check for that here, because we
27 // anticipate that we'll use a more general algorithm in the future, and
28 // we don't want to forget to remove the constraint. ***
29 if ( FLASH_Obj_depth( A ) != 1 )
30 {
31 FLA_Print_message( "FLASH_QR_UT() currently only supports matrices of depth 1",
33 FLA_Abort();
34 }
35
36 // Inspect the length of TTL to get the blocksize used by the QR
37 // factorization, which will be our inner blocksize for Apply_Q_UT.
40
41 // The traditional (non-incremental) QR_UT algorithm-by-blocks requires
42 // that the algorithmic blocksize be equal to the storage blocksize.
43 if ( b_alg != b_flash )
44 {
45 FLA_Print_message( "FLASH_QR_UT() requires that b_alg == b_store",
47 FLA_Abort();
48 }
49
50 // The traditional (non-incremental) QR_UT algorithm-by-blocks requires
51 // that min_dim(A) % b_flash == 0.
52 if ( FLASH_Obj_scalar_min_dim( A ) % b_flash != 0 )
53 {
54 FLA_Print_message( "FLASH_QR_UT() requires that min_dim( A ) %% b_store == 0",
56 FLA_Abort();
57 }
58
59 // Begin a parallel region.
61
62 // Invoke FLA_QR_UT_internal() with hierarchical control tree.
64
65 // End the parallel region.
67
68 return r_val;
69}
fla_qrut_t * flash_qrut_cntl
Definition FLASH_QR_UT_cntl_init.c:16
void FLASH_Queue_begin(void)
Definition FLASH_Queue.c:59
void FLASH_Queue_end(void)
Definition FLASH_Queue.c:81
dim_t FLASH_Obj_scalar_length_tl(FLA_Obj H)
Definition FLASH_View.c:723
dim_t FLASH_Obj_depth(FLA_Obj H)
Definition FLASH_Obj.c:20
dim_t FLASH_Obj_scalar_min_dim(FLA_Obj H)
Definition FLASH_View.c:675
dim_t FLASH_Obj_scalar_width_tl(FLA_Obj H)
Definition FLASH_View.c:737
void FLA_Abort(void)
Definition FLA_Error.c:248
void FLA_Print_message(char *str, char *file, int line)
Definition FLA_Error.c:234

◆ FLASH_QR_UT_create_hier_matrices()

FLA_Error FLASH_QR_UT_create_hier_matrices ( FLA_Obj  A_flat,
dim_t  depth,
dim_t b_flash,
FLA_Obj A,
FLA_Obj TW 
)
14{
15 FLA_Datatype datatype;
16 dim_t m, n;
18
19 // *** The current QR_UT algorithm implemented assumes that
20 // the matrix has a hierarchical depth of 1. We check for that here
21 // because we anticipate that we'll use a more general algorithm in the
22 // future, and we don't want to forget to remove the constraint. ***
23 if ( depth != 1 )
24 {
25 FLA_Print_message( "FLASH_QR_UT() currently only supports matrices of depth 1",
27 FLA_Abort();
28 }
29
30 // Create hierarchical copy of matrix A_flat.
32
33 // Query the datatype of matrix A_flat.
34 datatype = FLA_Obj_datatype( A_flat );
35
36 // Query the minimum dimension of A_flat.
38
39 // Set the m and n dimensions of TW to be min_m_n.
40 m = min_m_n;
41 n = min_m_n;
42
43 // Create hierarchical matrices T and W.
44 FLASH_Obj_create_ext( datatype, m, n,
46 TW );
47
48 return FLA_SUCCESS;
49}
FLA_Error FLASH_Obj_create_ext(FLA_Datatype datatype, dim_t m, dim_t n, dim_t depth, dim_t *b_m, dim_t *b_n, FLA_Obj *H)
Definition FLASH_Obj.c:151
FLA_Error FLASH_Obj_create_hier_copy_of_flat(FLA_Obj F, dim_t depth, dim_t *b_mn, FLA_Obj *H)
Definition FLASH_Obj.c:591

References FLA_Abort(), FLA_Obj_datatype(), FLA_Obj_min_dim(), FLA_Print_message(), FLASH_Obj_create_ext(), FLASH_Obj_create_hier_copy_of_flat(), and i.

◆ FLASH_QR_UT_solve()

FLA_Error FLASH_QR_UT_solve ( FLA_Obj  A,
FLA_Obj  T,
FLA_Obj  B,
FLA_Obj  X 
)
14{
15 FLA_Obj W, Y;
16 FLA_Obj AT, AB;
17 FLA_Obj YT, YB;
18
19 // Check parameters.
22
24
26
28 A, TW, W, Y );
29
30 FLA_Part_2x1( A, &AT,
31 &AB, FLA_Obj_width( A ), FLA_TOP );
32 FLA_Part_2x1( Y, &YT,
33 &YB, FLA_Obj_width( A ), FLA_TOP );
34
36 FLA_ONE, AT, YT );
37
38 FLASH_Copy( YT, X );
39
40 FLASH_Obj_free( &Y );
41 FLASH_Obj_free( &W );
42
43 return FLA_SUCCESS;
44}
FLA_Error FLASH_Copy(FLA_Obj A, FLA_Obj B)
Definition FLASH_Copy.c:15
FLA_Error FLASH_Trsm(FLA_Side side, FLA_Uplo uplo, FLA_Trans trans, FLA_Diag diag, FLA_Obj alpha, FLA_Obj A, FLA_Obj B)
Definition FLASH_Trsm.c:15
FLA_Error FLASH_Apply_Q_UT(FLA_Side side, FLA_Trans trans, FLA_Direct direct, FLA_Store storev, FLA_Obj A, FLA_Obj T, FLA_Obj W, FLA_Obj B)
Definition FLASH_Apply_Q_UT.c:16
void FLASH_Obj_free(FLA_Obj *H)
Definition FLASH_Obj.c:638
FLA_Error FLASH_Obj_create_copy_of(FLA_Trans trans, FLA_Obj H_cur, FLA_Obj *H_new)
Definition FLASH_Obj.c:561
FLA_Error FLASH_Apply_Q_UT_create_workspace(FLA_Obj TW, FLA_Obj B, FLA_Obj *W)
Definition FLASH_Apply_Q_UT_create_workspace.c:13