libflame revision_anchor
Functions
FLA_CAQR2_UT_blk_var1.c File Reference

(r)

Functions

FLA_Error FLA_CAQR2_UT_blk_var1 (FLA_Obj U, FLA_Obj D, FLA_Obj T, fla_caqr2ut_t *cntl)
 

Function Documentation

◆ FLA_CAQR2_UT_blk_var1()

FLA_Error FLA_CAQR2_UT_blk_var1 ( FLA_Obj  U,
FLA_Obj  D,
FLA_Obj  T,
fla_caqr2ut_t cntl 
)
15{
16 FLA_Obj UTL, UTR, U00, U01, U02,
17 UBL, UBR, U10, U11, U12,
18 U20, U21, U22;
19
20 FLA_Obj DTL, DTR, D00, D01, D02,
21 DBL, DBR, D10, D11, D12,
22 D20, D21, D22;
23
24 FLA_Obj TL, TR, T0, T1, W12;
25
26 FLA_Obj D1;
27
29
31
32 dim_t b_alg, b;
33 dim_t m_DT;
34
35 // Query the algorithmic blocksize by inspecting the length of T.
37
38 // Begin partitioning diagonally through D with m - n rows above
39 // the diagonal.
41
42 FLA_Part_2x2( U, &UTL, &UTR,
43 &UBL, &UBR, 0, 0, FLA_TL );
44
45 FLA_Part_2x2( D, &DTL, &DTR,
46 &DBL, &DBR, m_DT, 0, FLA_TL );
47
48 FLA_Part_1x2( T, &TL, &TR, 0, FLA_LEFT );
49
50 while ( FLA_Obj_min_dim( UBR ) > 0 ){
51
52 b = min( b_alg, FLA_Obj_min_dim( UBR ) );
53
54 FLA_Repart_2x2_to_3x3( UTL, /**/ UTR, &U00, /**/ &U01, &U02,
55 /* ************* */ /* ******************** */
56 &U10, /**/ &U11, &U12,
57 UBL, /**/ UBR, &U20, /**/ &U21, &U22,
58 b, b, FLA_BR );
59
60 FLA_Repart_2x2_to_3x3( DTL, /**/ DTR, &D00, /**/ &D01, &D02,
61 /* ************* */ /* ******************** */
62 &D10, /**/ &D11, &D12,
63 DBL, /**/ DBR, &D20, /**/ &D21, &D22,
64 b, b, FLA_BR );
65
66 FLA_Repart_1x2_to_1x3( TL, /**/ TR, &T0, /**/ &T1, &W12,
67 b, FLA_RIGHT );
68
69 /*------------------------------------------------------------*/
70
71 // T1T = FLA_Top_part( T1, b );
72
74 &T2B, b, FLA_TOP );
75
77 D11, &D1 );
78
79 // [ U11, ...
80 // D1, T1 ] = FLA_CAQR2_UT( U11
81 // D1, T1T );
82
84 D1, T1T,
85 FLA_Cntl_sub_caqr2ut( cntl ) );
86
87
88 if ( FLA_Obj_width( U12 ) > 0 )
89 {
90 // W12T = FLA_Top_part( W12, b );
91
93 &W12B, b, FLA_TOP );
94
95 // W12T = inv( triu( T1T ) )' * ( U12 + D1' * D2 );
96 // = inv( triu( T1T ) )' * ( U12 + D01' * D02 + D11' * D12 );
97
99 FLA_Cntl_sub_copy( cntl ) );
100
103 FLA_ONE, D11, W12T,
104 FLA_Cntl_sub_trmm1( cntl ) );
105
108 FLA_Cntl_sub_gemm1( cntl ) );
109
111 FLA_Cntl_sub_axpy1( cntl ) );
112
115 FLA_ONE, T1T, W12T,
116 FLA_Cntl_sub_trsm( cntl ) );
117
118 // U12 = U12 - W12T;
119 // D2 = D2 - D1 * W12T;
120 // => D02 = D02 - D01 * W12T;
121 // D12 = D12 - D11 * W12T;
122
124 FLA_Cntl_sub_axpy2( cntl ) );
125
128 FLA_Cntl_sub_gemm2( cntl ) );
129
132 FLA_ONE, D11, W12T,
133 FLA_Cntl_sub_trmm2( cntl ) );
134
136 FLA_Cntl_sub_axpy3( cntl ) );
137 }
138
139 /*------------------------------------------------------------*/
140
141 FLA_Cont_with_3x3_to_2x2( &UTL, /**/ &UTR, U00, U01, /**/ U02,
142 U10, U11, /**/ U12,
143 /* ************** */ /* ****************** */
144 &UBL, /**/ &UBR, U20, U21, /**/ U22,
145 FLA_TL );
146
147 FLA_Cont_with_3x3_to_2x2( &DTL, /**/ &DTR, D00, D01, /**/ D02,
148 D10, D11, /**/ D12,
149 /* ************** */ /* ****************** */
150 &DBL, /**/ &DBR, D20, D21, /**/ D22,
151 FLA_TL );
152
153 FLA_Cont_with_1x3_to_1x2( &TL, /**/ &TR, T0, T1, /**/ W12,
154 FLA_LEFT );
155
156 }
157
158 return FLA_SUCCESS;
159}
FLA_Error FLA_Axpy_internal(FLA_Obj alpha, FLA_Obj A, FLA_Obj B, fla_axpy_t *cntl)
Definition FLA_Axpy_internal.c:16
FLA_Error FLA_CAQR2_UT_internal(FLA_Obj U, FLA_Obj D, FLA_Obj T, fla_caqr2ut_t *cntl)
Definition FLA_CAQR2_UT_internal.c:16
FLA_Error FLA_Copy_internal(FLA_Obj A, FLA_Obj B, fla_copy_t *cntl)
Definition FLA_Copy_internal.c:16
FLA_Error FLA_Gemm_internal(FLA_Trans transa, FLA_Trans transb, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C, fla_gemm_t *cntl)
Definition FLA_Gemm_internal.c:16
FLA_Error FLA_Trmm_internal(FLA_Side side, FLA_Uplo uplo, FLA_Trans transa, FLA_Diag diag, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, fla_trmm_t *cntl)
Definition FLA_Trmm_internal.c:16
FLA_Error FLA_Trsm_internal(FLA_Side side, FLA_Uplo uplo, FLA_Trans transa, FLA_Diag diag, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, fla_trsm_t *cntl)
Definition FLA_Trsm_internal.c:16
FLA_Obj FLA_MINUS_ONE
Definition FLA_Init.c:22
FLA_Obj FLA_ONE
Definition FLA_Init.c:18
FLA_Error FLA_Cont_with_3x3_to_2x2(FLA_Obj *ATL, FLA_Obj *ATR, FLA_Obj A00, FLA_Obj A01, FLA_Obj A02, FLA_Obj A10, FLA_Obj A11, FLA_Obj A12, FLA_Obj *ABL, FLA_Obj *ABR, FLA_Obj A20, FLA_Obj A21, FLA_Obj A22, FLA_Quadrant quadrant)
Definition FLA_View.c:304
FLA_Error FLA_Part_2x2(FLA_Obj A, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition FLA_View.c:17
FLA_Error FLA_Cont_with_1x3_to_1x2(FLA_Obj *AL, FLA_Obj *AR, FLA_Obj A0, FLA_Obj A1, FLA_Obj A2, FLA_Side side)
Definition FLA_View.c:475
dim_t FLA_Obj_width(FLA_Obj obj)
Definition FLA_Query.c:123
FLA_Error FLA_Part_1x2(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition FLA_View.c:110
FLA_Error FLA_Part_2x1(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t mb, FLA_Side side)
Definition FLA_View.c:76
dim_t FLA_Obj_length(FLA_Obj obj)
Definition FLA_Query.c:116
FLA_Error FLA_Repart_2x2_to_3x3(FLA_Obj ATL, FLA_Obj ATR, FLA_Obj *A00, FLA_Obj *A01, FLA_Obj *A02, FLA_Obj *A10, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj ABL, FLA_Obj ABR, FLA_Obj *A20, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition FLA_View.c:142
FLA_Error FLA_Repart_1x2_to_1x3(FLA_Obj AL, FLA_Obj AR, FLA_Obj *A0, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition FLA_View.c:267
FLA_Error FLA_Merge_2x1(FLA_Obj AT, FLA_Obj AB, FLA_Obj *A)
Definition FLA_View.c:541
dim_t FLA_Obj_min_dim(FLA_Obj obj)
Definition FLA_Query.c:153
unsigned long dim_t
Definition FLA_type_defs.h:71
int i
Definition bl1_axmyv2.c:145
Definition FLA_type_defs.h:159

References FLA_Axpy_internal(), FLA_CAQR2_UT_internal(), FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x3_to_2x2(), FLA_Copy_internal(), FLA_Gemm_internal(), FLA_Merge_2x1(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_Obj_min_dim(), FLA_Obj_width(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x2_to_3x3(), FLA_Trmm_internal(), FLA_Trsm_internal(), and i.

Referenced by FLA_CAQR2_UT_internal().