libflame revision_anchor
Functions
FLA_LU_incpiv_aux.h File Reference

(r)

Go to the source code of this file.

Functions

FLA_Error FLA_SA_Apply_pivots (FLA_Obj C, FLA_Obj E, FLA_Obj p)
 
FLA_Error FLA_SA_LU_blk (FLA_Obj U, FLA_Obj D, FLA_Obj p, FLA_Obj L, dim_t nb_alg)
 
FLA_Error FLA_SA_LU_unb (FLA_Obj U, FLA_Obj D, FLA_Obj p, FLA_Obj L)
 
FLA_Error FLA_SA_FS_blk (FLA_Obj L, FLA_Obj D, FLA_Obj p, FLA_Obj C, FLA_Obj E, dim_t nb_alg)
 
FLA_Error FLASH_LU_incpiv_var1 (FLA_Obj A, FLA_Obj p, FLA_Obj L, dim_t nb_alg, fla_lu_t *cntl)
 
FLA_Error FLASH_LU_incpiv_var2 (FLA_Obj A, FLA_Obj p, FLA_Obj L, FLA_Obj U, dim_t nb_alg, fla_lu_t *cntl)
 
FLA_Error FLASH_Trsm_piv (FLA_Obj A, FLA_Obj B, FLA_Obj p, fla_trsm_t *cntl)
 
FLA_Error FLASH_SA_LU (FLA_Obj B, FLA_Obj C, FLA_Obj D, FLA_Obj E, FLA_Obj p, FLA_Obj L, dim_t nb_alg, fla_lu_t *cntl)
 
FLA_Error FLASH_SA_FS (FLA_Obj L, FLA_Obj D, FLA_Obj p, FLA_Obj C, FLA_Obj E, dim_t nb_alg, fla_gemm_t *cntl)
 
FLA_Error FLASH_FS_incpiv_aux1 (FLA_Obj A, FLA_Obj p, FLA_Obj L, FLA_Obj b, dim_t nb_alg)
 
FLA_Error FLASH_FS_incpiv_aux2 (FLA_Obj L, FLA_Obj D, FLA_Obj p, FLA_Obj C, FLA_Obj E, dim_t nb_alg)
 

Function Documentation

◆ FLA_SA_Apply_pivots()

FLA_Error FLA_SA_Apply_pivots ( FLA_Obj  C,
FLA_Obj  E,
FLA_Obj  p 
)
14{
15 FLA_Datatype datatype;
16 int m_C, n_C, cs_C;
17 int cs_E;
18 // int rs_C;
19 // int rs_E;
20 int m_p;
21 int i;
22 int* buff_p;
23
24 if ( FLA_Obj_has_zero_dim( C ) ) return FLA_SUCCESS;
25
26 datatype = FLA_Obj_datatype( C );
27
28 m_C = FLA_Obj_length( C );
29 n_C = FLA_Obj_width( C );
31 // rs_C = FLA_Obj_row_stride( C );
32
34 // rs_E = FLA_Obj_row_stride( E );
35
36 m_p = FLA_Obj_length( p );
37
38 buff_p = ( int * ) FLA_INT_PTR( p );
39
40
41 switch ( datatype ){
42
43 case FLA_FLOAT:
44 {
45 float* buff_C = ( float * ) FLA_FLOAT_PTR( C );
46 float* buff_E = ( float * ) FLA_FLOAT_PTR( E );
47
48 for ( i = 0; i < m_p; ++i )
49 {
50 if ( buff_p[ i ] != 0 )
51 bl1_sswap( n_C,
52 buff_C + 0*cs_C + i, cs_C,
53 buff_E + 0*cs_E + buff_p[ i ] - ( m_C - i ), cs_E );
54 }
55 break;
56 }
57
58 case FLA_DOUBLE:
59 {
60 double* buff_C = ( double * ) FLA_DOUBLE_PTR( C );
61 double* buff_E = ( double * ) FLA_DOUBLE_PTR( E );
62
63 for ( i = 0; i < m_p; ++i )
64 {
65 if ( buff_p[ i ] != 0 )
66 bl1_dswap( n_C,
67 buff_C + 0*cs_C + i, cs_C,
68 buff_E + 0*cs_E + buff_p[ i ] - ( m_C - i ), cs_E );
69 }
70 break;
71 }
72
73 case FLA_COMPLEX:
74 {
77
78 for ( i = 0; i < m_p; ++i )
79 {
80 if ( buff_p[ i ] != 0 )
81 bl1_cswap( n_C,
82 buff_C + 0*cs_C + i, cs_C,
83 buff_E + 0*cs_E + buff_p[ i ] - ( m_C - i ), cs_E );
84 }
85 break;
86 }
87
89 {
92
93 for ( i = 0; i < m_p; ++i )
94 {
95 if ( buff_p[ i ] != 0 )
96 bl1_zswap( n_C,
97 buff_C + 0*cs_C + i, cs_C,
98 buff_E + 0*cs_E + buff_p[ i ] - ( m_C - i ), cs_E );
99 }
100 break;
101 }
102
103 }
104
105 return FLA_SUCCESS;
106}
dim_t FLA_Obj_width(FLA_Obj obj)
Definition FLA_Query.c:123
FLA_Bool FLA_Obj_has_zero_dim(FLA_Obj A)
Definition FLA_Query.c:400
dim_t FLA_Obj_length(FLA_Obj obj)
Definition FLA_Query.c:116
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition FLA_Query.c:174
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition FLA_Query.c:13
int FLA_Datatype
Definition FLA_type_defs.h:49
int i
Definition bl1_axmyv2.c:145
void bl1_zswap(int n, dcomplex *x, int incx, dcomplex *y, int incy)
Definition bl1_swap.c:52
void bl1_dswap(int n, double *x, int incx, double *y, int incy)
Definition bl1_swap.c:26
void bl1_cswap(int n, scomplex *x, int incx, scomplex *y, int incy)
Definition bl1_swap.c:39
void bl1_sswap(int n, float *x, int incx, float *y, int incy)
Definition bl1_swap.c:13
Definition blis_type_defs.h:138
Definition blis_type_defs.h:133

References bl1_cswap(), bl1_dswap(), bl1_sswap(), bl1_zswap(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_has_zero_dim(), FLA_Obj_length(), FLA_Obj_width(), and i.

Referenced by FLA_SA_FS_blk(), and FLA_SA_LU_blk().

◆ FLA_SA_FS_blk()

FLA_Error FLA_SA_FS_blk ( FLA_Obj  L,
FLA_Obj  D,
FLA_Obj  p,
FLA_Obj  C,
FLA_Obj  E,
dim_t  nb_alg 
)
16{
17 FLA_Obj LT, L0,
18 LB, L1,
19 L2;
20
21 FLA_Obj DL, DR, D0, D1, D2;
22
23 FLA_Obj pT, p0,
24 pB, p1,
25 p2;
26
27 FLA_Obj CT, C0,
28 CB, C1,
29 C2;
30
32
33 dim_t b;
34
35 FLA_Part_2x1( L, &LT,
36 &LB, 0, FLA_TOP );
37
38 FLA_Part_1x2( D, &DL, &DR, 0, FLA_LEFT );
39
40 FLA_Part_2x1( p, &pT,
41 &pB, 0, FLA_TOP );
42
43 FLA_Part_2x1( C, &CT,
44 &CB, 0, FLA_TOP );
45
46 while ( FLA_Obj_length( LT ) < FLA_Obj_length( L ) )
47 {
48 b = min( FLA_Obj_length( LB ), nb_alg );
49
51 /* ** */ /* ** */
52 &L1,
53 LB, &L2, b, FLA_BOTTOM );
54
55 FLA_Repart_1x2_to_1x3( DL, /**/ DR, &D0, /**/ &D1, &D2,
56 b, FLA_RIGHT );
57
59 /* ** */ /* ** */
60 &p1,
61 pB, &p2, b, FLA_BOTTOM );
62
64 /* ** */ /* ** */
65 &C1,
66 CB, &C2, b, FLA_BOTTOM );
67
68 /*------------------------------------------------------------*/
69
71
72
74 E, p1 );
75
78 FLA_ONE, L1_sqr, C1 );
79
82
83 /*------------------------------------------------------------*/
84
86 L1,
87 /* ** */ /* ** */
88 &LB, L2, FLA_TOP );
89
90 FLA_Cont_with_1x3_to_1x2( &DL, /**/ &DR, D0, D1, /**/ D2,
91 FLA_LEFT );
92
94 p1,
95 /* ** */ /* ** */
96 &pB, p2, FLA_TOP );
97
99 C1,
100 /* ** */ /* ** */
101 &CB, C2, FLA_TOP );
102 }
103
104 return FLA_SUCCESS;
105}
FLA_Error FLA_SA_Apply_pivots(FLA_Obj C, FLA_Obj E, FLA_Obj p)
Definition FLA_SA_Apply_pivots.c:13
FLA_Error FLA_Gemm_external(FLA_Trans transa, FLA_Trans transb, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C)
Definition FLA_Gemm_external.c:13
FLA_Error FLA_Trsm_external(FLA_Side side, FLA_Uplo uplo, FLA_Trans trans, FLA_Diag diag, FLA_Obj alpha, FLA_Obj A, FLA_Obj B)
Definition FLA_Trsm_external.c:13
FLA_Obj FLA_MINUS_ONE
Definition FLA_Init.c:22
FLA_Obj FLA_ONE
Definition FLA_Init.c:18
FLA_Error FLA_Cont_with_3x1_to_2x1(FLA_Obj *AT, FLA_Obj A0, FLA_Obj A1, FLA_Obj *AB, FLA_Obj A2, FLA_Side side)
Definition FLA_View.c:428
FLA_Error FLA_Repart_2x1_to_3x1(FLA_Obj AT, FLA_Obj *A0, FLA_Obj *A1, FLA_Obj AB, FLA_Obj *A2, dim_t mb, FLA_Side side)
Definition FLA_View.c:226
FLA_Error FLA_Cont_with_1x3_to_1x2(FLA_Obj *AL, FLA_Obj *AR, FLA_Obj A0, FLA_Obj A1, FLA_Obj A2, FLA_Side side)
Definition FLA_View.c:475
FLA_Error FLA_Part_1x2(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition FLA_View.c:110
FLA_Error FLA_Part_2x1(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t mb, FLA_Side side)
Definition FLA_View.c:76
FLA_Error FLA_Repart_1x2_to_1x3(FLA_Obj AL, FLA_Obj AR, FLA_Obj *A0, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition FLA_View.c:267
unsigned long dim_t
Definition FLA_type_defs.h:71
Definition FLA_type_defs.h:159

References FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x1_to_2x1(), FLA_Gemm_external(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x1_to_3x1(), FLA_SA_Apply_pivots(), FLA_Trsm_external(), and i.

Referenced by FLA_SA_FS_task(), and FLASH_FS_incpiv_aux2().

◆ FLA_SA_LU_blk()

FLA_Error FLA_SA_LU_blk ( FLA_Obj  U,
FLA_Obj  D,
FLA_Obj  p,
FLA_Obj  L,
dim_t  nb_alg 
)
15{
16 FLA_Obj UTL, UTR, U00, U01, U02,
17 UBL, UBR, U10, U11, U12,
18 U20, U21, U22;
19
20 FLA_Obj DL, DR, D0, D1, D2;
21
22 FLA_Obj pT, p0,
23 pB, p1,
24 p2;
25
26 FLA_Obj LT, L0,
27 LB, L1,
28 L2;
29
31
32 dim_t b;
33
34 FLA_Part_2x2( U, &UTL, &UTR,
35 &UBL, &UBR, 0, 0, FLA_TL );
36
37 FLA_Part_1x2( D, &DL, &DR, 0, FLA_LEFT );
38
39 FLA_Part_2x1( p, &pT,
40 &pB, 0, FLA_TOP );
41
42 FLA_Part_2x1( L, &LT,
43 &LB, 0, FLA_TOP );
44
45 while ( FLA_Obj_length( UTL ) < FLA_Obj_length( U ) )
46 {
47 b = min( FLA_Obj_length( UBR ), nb_alg );
48
49 FLA_Repart_2x2_to_3x3( UTL, /**/ UTR, &U00, /**/ &U01, &U02,
50 /* ************* */ /* ******************** */
51 &U10, /**/ &U11, &U12,
52 UBL, /**/ UBR, &U20, /**/ &U21, &U22,
53 b, b, FLA_BR );
54
55 FLA_Repart_1x2_to_1x3( DL, /**/ DR, &D0, /**/ &D1, &D2,
56 b, FLA_RIGHT );
57
59 /* ** */ /* ** */
60 &p1,
61 pB, &p2, b, FLA_BOTTOM );
62
64 /* ** */ /* ** */
65 &L1,
66 LB, &L2, b, FLA_BOTTOM );
67
68 /*------------------------------------------------------------*/
69
71
72
74 D1, p1, L1_sqr );
75
77 D2, p1 );
78
81 FLA_ONE, L1_sqr, U12 );
82
85
86 /*------------------------------------------------------------*/
87
88 FLA_Cont_with_3x3_to_2x2( &UTL, /**/ &UTR, U00, U01, /**/ U02,
89 U10, U11, /**/ U12,
90 /* ************** */ /* ****************** */
91 &UBL, /**/ &UBR, U20, U21, /**/ U22,
92 FLA_TL );
93
94 FLA_Cont_with_1x3_to_1x2( &DL, /**/ &DR, D0, D1, /**/ D2,
95 FLA_LEFT );
96
98 p1,
99 /* ** */ /* ** */
100 &pB, p2, FLA_TOP );
101
103 L1,
104 /* ** */ /* ** */
105 &LB, L2, FLA_TOP );
106 }
107
108 return FLA_SUCCESS;
109}
FLA_Error FLA_SA_LU_unb(FLA_Obj U, FLA_Obj D, FLA_Obj p, FLA_Obj L)
Definition FLA_SA_LU_unb.c:13
FLA_Error FLA_Cont_with_3x3_to_2x2(FLA_Obj *ATL, FLA_Obj *ATR, FLA_Obj A00, FLA_Obj A01, FLA_Obj A02, FLA_Obj A10, FLA_Obj A11, FLA_Obj A12, FLA_Obj *ABL, FLA_Obj *ABR, FLA_Obj A20, FLA_Obj A21, FLA_Obj A22, FLA_Quadrant quadrant)
Definition FLA_View.c:304
FLA_Error FLA_Part_2x2(FLA_Obj A, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition FLA_View.c:17
FLA_Error FLA_Repart_2x2_to_3x3(FLA_Obj ATL, FLA_Obj ATR, FLA_Obj *A00, FLA_Obj *A01, FLA_Obj *A02, FLA_Obj *A10, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj ABL, FLA_Obj ABR, FLA_Obj *A20, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition FLA_View.c:142

References FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Gemm_external(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_SA_Apply_pivots(), FLA_SA_LU_unb(), FLA_Trsm_external(), and i.

Referenced by FLA_SA_LU_task().

◆ FLA_SA_LU_unb()

FLA_Error FLA_SA_LU_unb ( FLA_Obj  U,
FLA_Obj  D,
FLA_Obj  p,
FLA_Obj  L 
)
14{
15 FLA_Datatype datatype;
16 int m_U, cs_U;
17 int m_D, cs_D;
18 int cs_L;
19 // int rs_U;
20 int rs_D;
21 // int rs_L;
23 int j, ipiv;
24 int* buff_p;
25
26 if ( FLA_Obj_has_zero_dim( U ) ) return FLA_SUCCESS;
27
28 datatype = FLA_Obj_datatype( U );
29
30 m_U = FLA_Obj_length( U );
31 // rs_U = FLA_Obj_row_stride( U );
33
34 m_D = FLA_Obj_length( D );
37
38 // rs_L = FLA_Obj_row_stride( L );
40
43
44 buff_p = ( int * ) FLA_INT_PTR( p );
45
46 switch ( datatype ){
47
48 case FLA_FLOAT:
49 {
50 float* buff_U = ( float * ) FLA_FLOAT_PTR( U );
51 float* buff_D = ( float * ) FLA_FLOAT_PTR( D );
52 float* buff_L = ( float * ) FLA_FLOAT_PTR( L );
53 float* buff_minus1 = ( float * ) FLA_FLOAT_PTR( FLA_MINUS_ONE );
54 float L_tmp;
55 float D_tmp;
56 float d_inv_Ljj;
57
58 for ( j = 0; j < m_U; ++j )
59 {
60 bl1_samax( m_D,
61 buff_D + j*cs_D + 0*rs_D,
62 rs_D,
63 &ipiv );
64
65 L_tmp = buff_L[ j*cs_L + j ];
66 D_tmp = buff_D[ j*cs_D + ipiv ];
67
68 if ( fabsf( L_tmp ) < fabsf( D_tmp ) )
69 {
71 buff_L + 0*cs_L + j, cs_L,
72 buff_D + 0*cs_D + ipiv, cs_D );
73
74 buff_p[ j ] = ipiv + m_U - j;
75 }
76 else
77 {
78 buff_p[ j ] = 0;
79 }
80
81 d_inv_Ljj = 1.0F / buff_L[ j*cs_L + j ];
82
84 &d_inv_Ljj,
85 buff_D + j*cs_D + 0, rs_D );
86
87 m_U_min_j_min_1 = m_U - j - 1;
88
89 if ( m_U_min_j_min_1 > 0 )
90 {
93 m_D,
96 buff_D + (j+0)*cs_D + 0, rs_D,
97 buff_L + (j+1)*cs_L + j, cs_L,
98 buff_D + (j+1)*cs_D + 0, rs_D, cs_D );
99 }
100
101 m_U_min_j = m_U - j;
102
103 if ( m_U_min_j > 0 )
104 {
106 buff_L + j*cs_L + j, cs_L,
107 buff_U + j*cs_U + j, cs_U );
108 }
109 }
110 break;
111 }
112
113 case FLA_DOUBLE:
114 {
115 double* buff_U = ( double * ) FLA_DOUBLE_PTR( U );
116 double* buff_D = ( double * ) FLA_DOUBLE_PTR( D );
117 double* buff_L = ( double * ) FLA_DOUBLE_PTR( L );
118 double* buff_minus1 = ( double * ) FLA_DOUBLE_PTR( FLA_MINUS_ONE );
119 double L_tmp;
120 double D_tmp;
121 double d_inv_Ljj;
122
123 for ( j = 0; j < m_U; ++j )
124 {
125 bl1_damax( m_D,
126 buff_D + j*cs_D + 0*rs_D,
127 rs_D,
128 &ipiv );
129
130 L_tmp = buff_L[ j*cs_L + j ];
131 D_tmp = buff_D[ j*cs_D + ipiv ];
132
133 if ( fabs( L_tmp ) < fabs( D_tmp ) )
134 {
135 bl1_dswap( m_U,
136 buff_L + 0*cs_L + j, cs_L,
137 buff_D + 0*cs_D + ipiv, cs_D );
138
139 buff_p[ j ] = ipiv + m_U - j;
140 }
141 else
142 {
143 buff_p[ j ] = 0;
144 }
145
146 d_inv_Ljj = 1.0 / buff_L[ j*cs_L + j ];
147
148 bl1_dscal( m_D,
149 &d_inv_Ljj,
150 buff_D + j*cs_D + 0, rs_D );
151
152 m_U_min_j_min_1 = m_U - j - 1;
153
154 if ( m_U_min_j_min_1 > 0 )
155 {
158 m_D,
161 buff_D + (j+0)*cs_D + 0, rs_D,
162 buff_L + (j+1)*cs_L + j, cs_L,
163 buff_D + (j+1)*cs_D + 0, rs_D, cs_D );
164 }
165
166 m_U_min_j = m_U - j;
167
168 if ( m_U_min_j > 0 )
169 {
171 buff_L + j*cs_L + j, cs_L,
172 buff_U + j*cs_U + j, cs_U );
173 }
174 }
175 break;
176 }
177
178 case FLA_COMPLEX:
179 {
188 float temp;
189
190 for ( j = 0; j < m_U; ++j )
191 {
192 bl1_camax( m_D,
193 buff_D + j*cs_D + 0*rs_D,
194 rs_D,
195 &ipiv );
196
197 L_tmp = buff_L[ j*cs_L + j ];
198 D_tmp = buff_D[ j*cs_D + ipiv ];
199
200 if ( fabsf( L_tmp.real + L_tmp.imag ) < fabsf( D_tmp.real + D_tmp.imag ) )
201 {
202 bl1_cswap( m_U,
203 buff_L + 0*cs_L + j, cs_L,
204 buff_D + 0*cs_D + ipiv, cs_D );
205
206 buff_p[ j ] = ipiv + m_U - j;
207 }
208 else
209 {
210 buff_p[ j ] = 0;
211 }
212
213 Ljj = buff_L[ j*cs_L + j ];
214
215 // d_inv_Ljj = 1.0 / Ljj
216 temp = 1.0F / ( Ljj.real * Ljj.real +
217 Ljj.imag * Ljj.imag );
218 d_inv_Ljj.real = Ljj.real * temp;
219 d_inv_Ljj.imag = Ljj.imag * -temp;
220
221 bl1_cscal( m_D,
222 &d_inv_Ljj,
223 buff_D + j*cs_D + 0, rs_D );
224
225 m_U_min_j_min_1 = m_U - j - 1;
226
227 if ( m_U_min_j_min_1 > 0 )
228 {
231 m_D,
234 buff_D + (j+0)*cs_D + 0, rs_D,
235 buff_L + (j+1)*cs_L + j, cs_L,
236 buff_D + (j+1)*cs_D + 0, rs_D, cs_D );
237 }
238
239 m_U_min_j = m_U - j;
240
241 if ( m_U_min_j > 0 )
242 {
244 buff_L + j*cs_L + j, cs_L,
245 buff_U + j*cs_U + j, cs_U );
246 }
247 }
248 break;
249 }
250
252 {
261 double temp;
262
263 for ( j = 0; j < m_U; ++j )
264 {
265 bl1_zamax( m_D,
266 buff_D + j*cs_D + 0*rs_D,
267 rs_D,
268 &ipiv );
269
270 L_tmp = buff_L[ j*cs_L + j ];
271 D_tmp = buff_D[ j*cs_D + ipiv ];
272
273 if ( fabs( L_tmp.real + L_tmp.imag ) < fabs( D_tmp.real + D_tmp.imag ) )
274 {
275 bl1_zswap( m_U,
276 buff_L + 0*cs_L + j, cs_L,
277 buff_D + 0*cs_D + ipiv, cs_D );
278
279 buff_p[ j ] = ipiv + m_U - j;
280 }
281 else
282 {
283 buff_p[ j ] = 0;
284 }
285
286 Ljj = buff_L[ j*cs_L + j ];
287
288 // d_inv_Ljj = 1.0 / Ljj
289 temp = 1.0 / ( Ljj.real * Ljj.real +
290 Ljj.imag * Ljj.imag );
291 d_inv_Ljj.real = Ljj.real * temp;
292 d_inv_Ljj.imag = Ljj.imag * -temp;
293
294 bl1_zscal( m_D,
295 &d_inv_Ljj,
296 buff_D + j*cs_D + 0, rs_D );
297
298 m_U_min_j_min_1 = m_U - j - 1;
299
300 if ( m_U_min_j_min_1 > 0 )
301 {
304 m_D,
307 buff_D + (j+0)*cs_D + 0, rs_D,
308 buff_L + (j+1)*cs_L + j, cs_L,
309 buff_D + (j+1)*cs_D + 0, rs_D, cs_D );
310 }
311
312 m_U_min_j = m_U - j;
313
314 if ( m_U_min_j > 0 )
315 {
317 buff_L + j*cs_L + j, cs_L,
318 buff_U + j*cs_U + j, cs_U );
319 }
320 }
321 break;
322 }
323
324 }
325
326 return FLA_SUCCESS;
327}
FLA_Error FLA_Copy_external(FLA_Obj A, FLA_Obj B)
Definition FLA_Copy_external.c:13
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition FLA_Query.c:167
FLA_Error FLA_Triangularize(FLA_Uplo uplo, FLA_Diag diag, FLA_Obj A)
Definition FLA_Triangularize.c:13
void bl1_samax(int n, float *x, int incx, int *index)
Definition bl1_amax.c:13
void bl1_zamax(int n, dcomplex *x, int incx, int *index)
Definition bl1_amax.c:46
void bl1_damax(int n, double *x, int incx, int *index)
Definition bl1_amax.c:24
void bl1_camax(int n, scomplex *x, int incx, int *index)
Definition bl1_amax.c:35
dcomplex temp
Definition bl1_axpyv2b.c:301
void bl1_zcopy(int m, dcomplex *x, int incx, dcomplex *y, int incy)
Definition bl1_copy.c:52
void bl1_dcopy(int m, double *x, int incx, double *y, int incy)
Definition bl1_copy.c:26
void bl1_ccopy(int m, scomplex *x, int incx, scomplex *y, int incy)
Definition bl1_copy.c:39
void bl1_scopy(int m, float *x, int incx, float *y, int incy)
Definition bl1_copy.c:13
void bl1_dger(conj1_t conjx, conj1_t conjy, int m, int n, double *alpha, double *x, int incx, double *y, int incy, double *a, int a_rs, int a_cs)
Definition bl1_ger.c:62
void bl1_zger(conj1_t conjx, conj1_t conjy, int m, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *a, int a_rs, int a_cs)
Definition bl1_ger.c:194
void bl1_cger(conj1_t conjx, conj1_t conjy, int m, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy, scomplex *a, int a_rs, int a_cs)
Definition bl1_ger.c:111
void bl1_sger(conj1_t conjx, conj1_t conjy, int m, int n, float *alpha, float *x, int incx, float *y, int incy, float *a, int a_rs, int a_cs)
Definition bl1_ger.c:13
void bl1_dscal(int n, double *alpha, double *x, int incx)
Definition bl1_scal.c:26
void bl1_zscal(int n, dcomplex *alpha, dcomplex *x, int incx)
Definition bl1_scal.c:78
void bl1_cscal(int n, scomplex *alpha, scomplex *x, int incx)
Definition bl1_scal.c:52
void bl1_sscal(int n, float *alpha, float *x, int incx)
Definition bl1_scal.c:13
@ BLIS1_NO_CONJUGATE
Definition blis_type_defs.h:81
double real
Definition blis_type_defs.h:139
double imag
Definition blis_type_defs.h:139

References bl1_camax(), bl1_ccopy(), bl1_cger(), bl1_cscal(), bl1_cswap(), bl1_damax(), bl1_dcopy(), bl1_dger(), bl1_dscal(), bl1_dswap(), bl1_samax(), bl1_scopy(), bl1_sger(), bl1_sscal(), bl1_sswap(), bl1_zamax(), bl1_zcopy(), bl1_zger(), bl1_zscal(), bl1_zswap(), BLIS1_NO_CONJUGATE, FLA_Copy_external(), FLA_MINUS_ONE, FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_has_zero_dim(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Triangularize(), i, dcomplex::imag, dcomplex::real, and temp.

Referenced by FLA_SA_LU_blk().

◆ FLASH_FS_incpiv_aux1()

FLA_Error FLASH_FS_incpiv_aux1 ( FLA_Obj  A,
FLA_Obj  p,
FLA_Obj  L,
FLA_Obj  b,
dim_t  nb_alg 
)
14{
16 ABL, ABR, A10, A11, A12,
17 A20, A21, A22;
18
20 pBL, pBR, p10, p11, p12,
21 p20, p21, p22;
22
24 LBL, LBR, L10, L11, L12,
25 L20, L21, L22;
26
27 FLA_Obj bT, b0,
28 bB, b1,
29 b2;
30
33
34 FLA_Part_2x2( A, &ATL, &ATR,
35 &ABL, &ABR, 0, 0, FLA_TL );
36
37 FLA_Part_2x2( p, &pTL, &pTR,
38 &pBL, &pBR, 0, 0, FLA_TL );
39
40 FLA_Part_2x2( L, &LTL, &LTR,
41 &LBL, &LBR, 0, 0, FLA_TL );
42
43 FLA_Part_2x1( b, &bT,
44 &bB, 0, FLA_TOP );
45
46 while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) &&
48 {
49 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
50 /* ************* */ /* ******************** */
51 &A10, /**/ &A11, &A12,
52 ABL, /**/ ABR, &A20, /**/ &A21, &A22,
53 1, 1, FLA_BR );
54
55 FLA_Repart_2x2_to_3x3( pTL, /**/ pTR, &p00, /**/ &p01, &p02,
56 /* ************* */ /* ******************** */
57 &p10, /**/ &p11, &p12,
58 pBL, /**/ pBR, &p20, /**/ &p21, &p22,
59 1, 1, FLA_BR );
60
61 FLA_Repart_2x2_to_3x3( LTL, /**/ LTR, &L00, /**/ &L01, &L02,
62 /* ************* */ /* ******************** */
63 &L10, /**/ &L11, &L12,
64 LBL, /**/ LBR, &L20, /**/ &L21, &L22,
65 1, 1, FLA_BR );
66
68 /* ** */ /* ** */
69 &b1,
70 bB, &b2, 1, FLA_BOTTOM );
71
72 /*------------------------------------------------------------*/
73
75 &p11_rest,
77
78
81 *FLASH_OBJ_PTR_AT( b1 ) );
82
85 *FLASH_OBJ_PTR_AT( b1 ) );
86
88 A21, p21, b1,
89 b2, nb_alg );
90
91 /*------------------------------------------------------------*/
92
93 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
94 A10, A11, /**/ A12,
95 /* ************** */ /* ****************** */
96 &ABL, /**/ &ABR, A20, A21, /**/ A22,
97 FLA_TL );
98
99 FLA_Cont_with_3x3_to_2x2( &pTL, /**/ &pTR, p00, p01, /**/ p02,
100 p10, p11, /**/ p12,
101 /* ************** */ /* ****************** */
102 &pBL, /**/ &pBR, p20, p21, /**/ p22,
103 FLA_TL );
104
105 FLA_Cont_with_3x3_to_2x2( &LTL, /**/ &LTR, L00, L01, /**/ L02,
106 L10, L11, /**/ L12,
107 /* ************** */ /* ****************** */
108 &LBL, /**/ &LBR, L20, L21, /**/ L22,
109 FLA_TL );
110
112 b1,
113 /* ** */ /* ** */
114 &bB, b2, FLA_TOP );
115 }
116
117 return FLA_SUCCESS;
118}
FLA_Error FLASH_FS_incpiv_aux2(FLA_Obj L, FLA_Obj D, FLA_Obj p, FLA_Obj C, FLA_Obj E, dim_t nb_alg)
Definition FLASH_FS_incpiv_aux2.c:13
FLA_Error FLA_Trsv_external(FLA_Uplo uplo, FLA_Trans transa, FLA_Diag diag, FLA_Obj A, FLA_Obj x)
Definition FLA_Trsv_external.c:13
FLA_Error FLA_Apply_pivots(FLA_Side side, FLA_Trans trans, FLA_Obj p, FLA_Obj A)
Definition FLA_Apply_pivots.c:15

References FLA_Apply_pivots(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Obj_length(), FLA_Obj_width(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Trsv_external(), FLASH_FS_incpiv_aux2(), and i.

Referenced by FLASH_FS_incpiv().

◆ FLASH_FS_incpiv_aux2()

FLA_Error FLASH_FS_incpiv_aux2 ( FLA_Obj  L,
FLA_Obj  D,
FLA_Obj  p,
FLA_Obj  C,
FLA_Obj  E,
dim_t  nb_alg 
)
16{
17 FLA_Obj LT, L0,
18 LB, L1,
19 L2;
20
21 FLA_Obj DT, D0,
22 DB, D1,
23 D2;
24
25 FLA_Obj pT, p0,
26 pB, p1,
27 p2;
28
29 FLA_Obj ET, E0,
30 EB, E1,
31 E2;
32
33 FLA_Part_2x1( L, &LT,
34 &LB, 0, FLA_TOP );
35
36 FLA_Part_2x1( D, &DT,
37 &DB, 0, FLA_TOP );
38
39 FLA_Part_2x1( p, &pT,
40 &pB, 0, FLA_TOP );
41
42 FLA_Part_2x1( E, &ET,
43 &EB, 0, FLA_TOP );
44
45 while ( FLA_Obj_length( DT ) < FLA_Obj_length( D ) )
46 {
48 /* ** */ /* ** */
49 &L1,
50 LB, &L2, 1, FLA_BOTTOM );
51
53 /* ** */ /* ** */
54 &D1,
55 DB, &D2, 1, FLA_BOTTOM );
56
58 /* ** */ /* ** */
59 &p1,
60 pB, &p2, 1, FLA_BOTTOM );
61
63 /* ** */ /* ** */
64 &E1,
65 EB, &E2, 1, FLA_BOTTOM );
66
67 /*------------------------------------------------------------*/
68
74 nb_alg );
75
76 /*------------------------------------------------------------*/
77
79 L1,
80 /* ** */ /* ** */
81 &LB, L2, FLA_TOP );
82
84 D1,
85 /* ** */ /* ** */
86 &DB, D2, FLA_TOP );
87
89 p1,
90 /* ** */ /* ** */
91 &pB, p2, FLA_TOP );
92
94 E1,
95 /* ** */ /* ** */
96 &EB, E2, FLA_TOP );
97 }
98
99 return FLA_SUCCESS;
100}
FLA_Error FLA_SA_FS_blk(FLA_Obj L, FLA_Obj D, FLA_Obj p, FLA_Obj C, FLA_Obj E, dim_t nb_alg)
Definition FLA_SA_FS_blk.c:13

References FLA_Cont_with_3x1_to_2x1(), FLA_Obj_length(), FLA_Part_2x1(), FLA_Repart_2x1_to_3x1(), FLA_SA_FS_blk(), and i.

Referenced by FLASH_FS_incpiv_aux1().

◆ FLASH_LU_incpiv_var1()

FLA_Error FLASH_LU_incpiv_var1 ( FLA_Obj  A,
FLA_Obj  p,
FLA_Obj  L,
dim_t  nb_alg,
fla_lu_t cntl 
)
14{
16 ABL, ABR, A10, A11, A12,
17 A20, A21, A22;
18
20 pBL, pBR, p10, p11, p12,
21 p20, p21, p22;
22
24 LBL, LBR, L10, L11, L12,
25 L20, L21, L22;
26
27 FLA_Part_2x2( A, &ATL, &ATR,
28 &ABL, &ABR, 0, 0, FLA_TL );
29
30 FLA_Part_2x2( p, &pTL, &pTR,
31 &pBL, &pBR, 0, 0, FLA_TL );
32
33 FLA_Part_2x2( L, &LTL, &LTR,
34 &LBL, &LBR, 0, 0, FLA_TL );
35
36 while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) &&
38 {
39 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
40 /* ************* */ /* ******************** */
41 &A10, /**/ &A11, &A12,
42 ABL, /**/ ABR, &A20, /**/ &A21, &A22,
43 1, 1, FLA_BR );
44
45 FLA_Repart_2x2_to_3x3( pTL, /**/ pTR, &p00, /**/ &p01, &p02,
46 /* ************* */ /* ******************** */
47 &p10, /**/ &p11, &p12,
48 pBL, /**/ pBR, &p20, /**/ &p21, &p22,
49 1, 1, FLA_BR );
50
51 FLA_Repart_2x2_to_3x3( LTL, /**/ LTR, &L00, /**/ &L01, &L02,
52 /* ************* */ /* ******************** */
53 &L10, /**/ &L11, &L12,
54 LBL, /**/ LBR, &L20, /**/ &L21, &L22,
55 1, 1, FLA_BR );
56
57 /*------------------------------------------------------------*/
58
60 {
61 // Enqueue
64 FLA_Cntl_sub_lu( cntl ) );
65 }
66 else
67 {
68 // Execute leaf
71 FLA_Cntl_sub_lu( cntl ) );
72 }
73
75 FLA_Cntl_sub_trsm1( cntl ) );
76
78 A21, A22, p21, L21, nb_alg, cntl );
79
80 /*------------------------------------------------------------*/
81
82 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
83 A10, A11, /**/ A12,
84 /* ************** */ /* ****************** */
85 &ABL, /**/ &ABR, A20, A21, /**/ A22,
86 FLA_TL );
87
88 FLA_Cont_with_3x3_to_2x2( &pTL, /**/ &pTR, p00, p01, /**/ p02,
89 p10, p11, /**/ p12,
90 /* ************** */ /* ****************** */
91 &pBL, /**/ &pBR, p20, p21, /**/ p22,
92 FLA_TL );
93
94 FLA_Cont_with_3x3_to_2x2( &LTL, /**/ &LTR, L00, L01, /**/ L02,
95 L10, L11, /**/ L12,
96 /* ************** */ /* ****************** */
97 &LBL, /**/ &LBR, L20, L21, /**/ L22,
98 FLA_TL );
99 }
100
101 return FLA_SUCCESS;
102}
FLA_Bool FLASH_Queue_get_enabled(void)
Definition FLASH_Queue.c:171
FLA_Error FLASH_SA_LU(FLA_Obj B, FLA_Obj C, FLA_Obj D, FLA_Obj E, FLA_Obj p, FLA_Obj L, dim_t nb_alg, fla_lu_t *cntl)
Definition FLASH_SA_LU.c:13
FLA_Error FLASH_Trsm_piv(FLA_Obj A, FLA_Obj B, FLA_Obj p, fla_trsm_t *cntl)
Definition FLASH_Trsm_piv.c:13
FLA_Error FLA_LU_piv_task(FLA_Obj A, FLA_Obj p, fla_lu_t *cntl)
Definition FLA_LU_piv_task.c:15

References FLA_Cont_with_3x3_to_2x2(), FLA_LU_piv_task(), FLA_Obj_length(), FLA_Obj_width(), FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), FLASH_Queue_get_enabled(), FLASH_SA_LU(), FLASH_Trsm_piv(), and i.

Referenced by FLASH_LU_incpiv_noopt().

◆ FLASH_LU_incpiv_var2()

FLA_Error FLASH_LU_incpiv_var2 ( FLA_Obj  A,
FLA_Obj  p,
FLA_Obj  L,
FLA_Obj  U,
dim_t  nb_alg,
fla_lu_t cntl 
)
14{
16 ABL, ABR, A10, A11, A12,
17 A20, A21, A22;
18
20 pBL, pBR, p10, p11, p12,
21 p20, p21, p22;
22
24 LBL, LBR, L10, L11, L12,
25 L20, L21, L22;
26
27 FLA_Obj UL, UR, U0, U1, U2;
28
29 FLA_Part_2x2( A, &ATL, &ATR,
30 &ABL, &ABR, 0, 0, FLA_TL );
31
32 FLA_Part_2x2( p, &pTL, &pTR,
33 &pBL, &pBR, 0, 0, FLA_TL );
34
35 FLA_Part_2x2( L, &LTL, &LTR,
36 &LBL, &LBR, 0, 0, FLA_TL );
37
38 FLA_Part_1x2( U, &UL, &UR, 0, FLA_LEFT );
39
40 while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) &&
42 {
43 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
44 /* ************* */ /* ******************** */
45 &A10, /**/ &A11, &A12,
46 ABL, /**/ ABR, &A20, /**/ &A21, &A22,
47 1, 1, FLA_BR );
48
49 FLA_Repart_2x2_to_3x3( pTL, /**/ pTR, &p00, /**/ &p01, &p02,
50 /* ************* */ /* ******************** */
51 &p10, /**/ &p11, &p12,
52 pBL, /**/ pBR, &p20, /**/ &p21, &p22,
53 1, 1, FLA_BR );
54
55 FLA_Repart_2x2_to_3x3( LTL, /**/ LTR, &L00, /**/ &L01, &L02,
56 /* ************* */ /* ******************** */
57 &L10, /**/ &L11, &L12,
58 LBL, /**/ LBR, &L20, /**/ &L21, &L22,
59 1, 1, FLA_BR );
60
61 FLA_Repart_1x2_to_1x3( UL, /**/ UR, &U0, /**/ &U1, &U2,
62 1, FLA_RIGHT );
63
64 /*------------------------------------------------------------*/
65
67 {
68 // Enqueue
72 FLA_Cntl_sub_lu( cntl ) );
73 }
74 else
75 {
76 // Execute leaf
80 FLA_Cntl_sub_lu( cntl ) );
81 }
82
84 FLA_Cntl_sub_trsm1( cntl ) );
85
87 A21, A22, p21, L21, nb_alg, cntl );
88
89 /*------------------------------------------------------------*/
90
91 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
92 A10, A11, /**/ A12,
93 /* ************** */ /* ****************** */
94 &ABL, /**/ &ABR, A20, A21, /**/ A22,
95 FLA_TL );
96
97 FLA_Cont_with_3x3_to_2x2( &pTL, /**/ &pTR, p00, p01, /**/ p02,
98 p10, p11, /**/ p12,
99 /* ************** */ /* ****************** */
100 &pBL, /**/ &pBR, p20, p21, /**/ p22,
101 FLA_TL );
102
103 FLA_Cont_with_3x3_to_2x2( &LTL, /**/ &LTR, L00, L01, /**/ L02,
104 L10, L11, /**/ L12,
105 /* ************** */ /* ****************** */
106 &LBL, /**/ &LBR, L20, L21, /**/ L22,
107 FLA_TL );
108
109 FLA_Cont_with_1x3_to_1x2( &UL, /**/ &UR, U0, U1, /**/ U2,
110 FLA_LEFT );
111 }
112
113 return FLA_SUCCESS;
114}
FLA_Error FLA_LU_piv_copy_task(FLA_Obj A, FLA_Obj p, FLA_Obj U, fla_lu_t *cntl)
Definition FLA_LU_piv_copy_task.c:13

References FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x3_to_2x2(), FLA_LU_piv_copy_task(), FLA_Obj_length(), FLA_Obj_width(), FLA_Part_1x2(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x2_to_3x3(), FLASH_Queue_get_enabled(), FLASH_SA_LU(), FLASH_Trsm_piv(), and i.

Referenced by FLASH_LU_incpiv_opt1().

◆ FLASH_SA_FS()

FLA_Error FLASH_SA_FS ( FLA_Obj  L,
FLA_Obj  D,
FLA_Obj  p,
FLA_Obj  C,
FLA_Obj  E,
dim_t  nb_alg,
fla_gemm_t cntl 
)
16{
17 FLA_Obj CL, CR, C0, C1, C2;
18
19 FLA_Obj EL, ER, E0, E1, E2;
20
21 FLA_Part_1x2( C, &CL, &CR, 0, FLA_LEFT );
22
23 FLA_Part_1x2( E, &EL, &ER, 0, FLA_LEFT );
24
25 while ( FLA_Obj_width( CL ) < FLA_Obj_width( C ) )
26 {
27 FLA_Repart_1x2_to_1x3( CL, /**/ CR, &C0, /**/ &C1, &C2,
28 1, FLA_RIGHT );
29
30 FLA_Repart_1x2_to_1x3( EL, /**/ ER, &E0, /**/ &E1, &E2,
31 1, FLA_RIGHT );
32
33 /*------------------------------------------------------------*/
34
36 {
37 // Enqueue
43 nb_alg,
44 FLA_Cntl_sub_gemm( cntl ) );
45 }
46 else
47 {
48 // Execute leaf
54 nb_alg,
55 FLA_Cntl_sub_gemm( cntl ) );
56 }
57
58 /*------------------------------------------------------------*/
59
60 FLA_Cont_with_1x3_to_1x2( &CL, /**/ &CR, C0, C1, /**/ C2,
61 FLA_LEFT );
62
63 FLA_Cont_with_1x3_to_1x2( &EL, /**/ &ER, E0, E1, /**/ E2,
64 FLA_LEFT );
65 }
66
67 return FLA_SUCCESS;
68}
FLA_Error FLA_SA_FS_task(FLA_Obj L, FLA_Obj D, FLA_Obj p, FLA_Obj C, FLA_Obj E, dim_t nb_alg, fla_gemm_t *cntl)
Definition FLA_SA_FS_task.c:13

References FLA_Cont_with_1x3_to_1x2(), FLA_Obj_width(), FLA_Part_1x2(), FLA_Repart_1x2_to_1x3(), FLA_SA_FS_task(), FLASH_Queue_get_enabled(), and i.

Referenced by FLASH_SA_LU().

◆ FLASH_SA_LU()

FLA_Error FLASH_SA_LU ( FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  D,
FLA_Obj  E,
FLA_Obj  p,
FLA_Obj  L,
dim_t  nb_alg,
fla_lu_t cntl 
)
15{
16 FLA_Obj DT, D0,
17 DB, D1,
18 D2;
19
20 FLA_Obj ET, E0,
21 EB, E1,
22 E2;
23
24 FLA_Obj pT, p0,
25 pB, p1,
26 p2;
27
28 FLA_Obj LT, L0,
29 LB, L1,
30 L2;
31
32 FLA_Part_2x1( D, &DT,
33 &DB, 0, FLA_TOP );
34
35 FLA_Part_2x1( E, &ET,
36 &EB, 0, FLA_TOP );
37
38 FLA_Part_2x1( p, &pT,
39 &pB, 0, FLA_TOP );
40
41 FLA_Part_2x1( L, &LT,
42 &LB, 0, FLA_TOP );
43
44 while ( FLA_Obj_length( DT ) < FLA_Obj_length( D ) )
45 {
47 /* ** */ /* ** */
48 &D1,
49 DB, &D2, 1, FLA_BOTTOM );
50
52 /* ** */ /* ** */
53 &E1,
54 EB, &E2, 1, FLA_BOTTOM );
55
57 /* ** */ /* ** */
58 &p1,
59 pB, &p2, 1, FLA_BOTTOM );
60
62 /* ** */ /* ** */
63 &L1,
64 LB, &L2, 1, FLA_BOTTOM );
65
66 /*------------------------------------------------------------*/
67
69 {
70 // Enqueue
75 nb_alg,
76 FLA_Cntl_sub_lu( cntl ) );
77 }
78 else
79 {
80 // Execute leaf
85 nb_alg,
86 FLA_Cntl_sub_lu( cntl ) );
87 }
88
90 D1, p1, C,
91 E1, nb_alg, FLA_Cntl_sub_gemm1( cntl ) );
92
93 /*------------------------------------------------------------*/
94
96 D1,
97 /* ** */ /* ** */
98 &DB, D2, FLA_TOP );
99
101 E1,
102 /* ** */ /* ** */
103 &EB, E2, FLA_TOP );
104
106 p1,
107 /* ** */ /* ** */
108 &pB, p2, FLA_TOP );
109
111 L1,
112 /* ** */ /* ** */
113 &LB, L2, FLA_TOP );
114 }
115
116 return FLA_SUCCESS;
117}
FLA_Error FLASH_SA_FS(FLA_Obj L, FLA_Obj D, FLA_Obj p, FLA_Obj C, FLA_Obj E, dim_t nb_alg, fla_gemm_t *cntl)
Definition FLASH_SA_FS.c:13
FLA_Error FLA_SA_LU_task(FLA_Obj U, FLA_Obj D, FLA_Obj p, FLA_Obj L, dim_t nb_alg, fla_lu_t *cntl)
Definition FLA_SA_LU_task.c:13

References FLA_Cont_with_3x1_to_2x1(), FLA_Obj_length(), FLA_Part_2x1(), FLA_Repart_2x1_to_3x1(), FLA_SA_LU_task(), FLASH_Queue_get_enabled(), FLASH_SA_FS(), and i.

Referenced by FLASH_LU_incpiv_var1(), and FLASH_LU_incpiv_var2().

◆ FLASH_Trsm_piv()

FLA_Error FLASH_Trsm_piv ( FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  p,
fla_trsm_t cntl 
)
14{
15 FLA_Obj BL, BR, B0, B1, B2;
16
17 FLA_Part_1x2( B, &BL, &BR, 0, FLA_LEFT );
18
19 while ( FLA_Obj_width( BL ) < FLA_Obj_width( B ) )
20 {
21 FLA_Repart_1x2_to_1x3( BL, /**/ BR, &B0, /**/ &B1, &B2,
22 1, FLA_RIGHT );
23
24 /*------------------------------------------------------------*/
25
27 {
28 // Enqueue
32 FLA_Cntl_sub_trsm( cntl ) );
33 }
34 else
35 {
36 // Execute leaf
40 FLA_Cntl_sub_trsm( cntl ) );
41 }
42
43 /*------------------------------------------------------------*/
44
45 FLA_Cont_with_1x3_to_1x2( &BL, /**/ &BR, B0, B1, /**/ B2,
46 FLA_LEFT );
47 }
48
49 return FLA_SUCCESS;
50}
FLA_Error FLA_Trsm_piv_task(FLA_Obj A, FLA_Obj B, FLA_Obj p, fla_trsm_t *cntl)
Definition FLA_Trsm_piv_task.c:13

References FLA_Cont_with_1x3_to_1x2(), FLA_Obj_width(), FLA_Part_1x2(), FLA_Repart_1x2_to_1x3(), FLA_Trsm_piv_task(), FLASH_Queue_get_enabled(), and i.

Referenced by FLASH_LU_incpiv_var1(), and FLASH_LU_incpiv_var2().