libflame revision_anchor
Functions
FLA_LU_piv_vars.h File Reference

(r)

Go to the source code of this file.

Functions

FLA_Error FLA_LU_piv_blk_var3 (FLA_Obj A, FLA_Obj p, fla_lu_t *cntl)
 
FLA_Error FLA_LU_piv_blk_var4 (FLA_Obj A, FLA_Obj p, fla_lu_t *cntl)
 
FLA_Error FLA_LU_piv_blk_var5 (FLA_Obj A, FLA_Obj p, fla_lu_t *cntl)
 
FLA_Error FLA_LU_piv_unb_var3 (FLA_Obj A, FLA_Obj p)
 
FLA_Error FLA_LU_piv_unb_var3b (FLA_Obj A, FLA_Obj p)
 
FLA_Error FLA_LU_piv_unb_var4 (FLA_Obj A, FLA_Obj p)
 
FLA_Error FLA_LU_piv_unb_var5 (FLA_Obj A, FLA_Obj p)
 
FLA_Error FLA_LU_piv_opt_var3 (FLA_Obj A, FLA_Obj p)
 
FLA_Error FLA_LU_piv_ops_var3 (int m_A, int n_A, float *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
 
FLA_Error FLA_LU_piv_opd_var3 (int m_A, int n_A, double *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
 
FLA_Error FLA_LU_piv_opc_var3 (int m_A, int n_A, scomplex *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
 
FLA_Error FLA_LU_piv_opz_var3 (int m_A, int n_A, dcomplex *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
 
FLA_Error FLA_LU_piv_opt_var4 (FLA_Obj A, FLA_Obj p)
 
FLA_Error FLA_LU_piv_ops_var4 (int m_A, int n_A, float *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
 
FLA_Error FLA_LU_piv_opd_var4 (int m_A, int n_A, double *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
 
FLA_Error FLA_LU_piv_opc_var4 (int m_A, int n_A, scomplex *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
 
FLA_Error FLA_LU_piv_opz_var4 (int m_A, int n_A, dcomplex *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
 
FLA_Error FLA_LU_piv_opt_var5 (FLA_Obj A, FLA_Obj p)
 
FLA_Error FLA_LU_piv_ops_var5 (int m_A, int n_A, float *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
 
FLA_Error FLA_LU_piv_opd_var5 (int m_A, int n_A, double *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
 
FLA_Error FLA_LU_piv_opc_var5 (int m_A, int n_A, scomplex *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
 
FLA_Error FLA_LU_piv_opz_var5 (int m_A, int n_A, dcomplex *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
 

Function Documentation

◆ FLA_LU_piv_blk_var3()

FLA_Error FLA_LU_piv_blk_var3 ( FLA_Obj  A,
FLA_Obj  p,
fla_lu_t cntl 
)
16{
18 FLA_Obj ATL, ATR, A00, A01, A02,
19 ABL, ABR, A10, A11, A12,
20 A20, A21, A22;
21
22 FLA_Obj AL, AR, A0, A1, A2;
23
24 FLA_Obj pT, p0,
25 pB, p1,
26 p2;
27
29
30 dim_t b;
31
32
33 FLA_Part_2x2( A, &ATL, &ATR,
34 &ABL, &ABR, 0, 0, FLA_TL );
35
36 FLA_Part_1x2( A, &AL, &AR, 0, FLA_LEFT );
37
38 FLA_Part_2x1( p, &pT,
39 &pB, 0, FLA_TOP );
40
41 while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) &&
43
45
46 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
47 /* ************* */ /* ******************** */
48 &A10, /**/ &A11, &A12,
49 ABL, /**/ ABR, &A20, /**/ &A21, &A22,
50 b, b, FLA_BR );
51
52 FLA_Repart_1x2_to_1x3( AL, /**/ AR, &A0, /**/ &A1, &A2,
53 b, FLA_RIGHT );
54
56 /* ** */ /* ** */
57 &p1,
58 pB, &p2, b, FLA_BOTTOM );
59
60 /*------------------------------------------------------------*/
61
62 // Apply previously computed pivots
64 FLA_Cntl_sub_appiv1( cntl ) );
65
66 // A01 = trilu( A00 ) \ A10
69 FLA_ONE, A00, A01,
70 FLA_Cntl_sub_trsm1( cntl ) );
71
72 // A11 = A11 - A10 * A01
75 FLA_Cntl_sub_gemm1( cntl ) );
76
77 // A21 = A21 - A20 * A01
80 FLA_Cntl_sub_gemm2( cntl ) );
81
82 // AB1 = / A11 \
83 // \ A21 /
85 A21, &AB1 );
86
87 // AB1, p1 = LU_piv( AB1 )
89 FLA_Cntl_sub_lu( cntl ) );
90
91 // If the unblocked algorithm returns a null pivot,
92 // update the pivot index and return it.
93 if ( r_val == FLA_SUCCESS && r_val_sub >= 0 )
94 {
96 }
97
98 // AB0 = / A10 \
99 // \ A20 /
101 A20, &AB0 );
102
103 // Apply pivots to previous columns
105 FLA_Cntl_sub_appiv2( cntl ) );
106
107 /*------------------------------------------------------------*/
108
109 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
110 A10, A11, /**/ A12,
111 /* ************** */ /* ****************** */
112 &ABL, /**/ &ABR, A20, A21, /**/ A22,
113 FLA_TL );
114
115 FLA_Cont_with_1x3_to_1x2( &AL, /**/ &AR, A0, A1, /**/ A2,
116 FLA_LEFT );
117
119 p1,
120 /* ** */ /* ** */
121 &pB, p2, FLA_TOP );
122
123 }
124
125 if ( FLA_Obj_width( ATR ) > 0 )
126 {
127 /* Apply pivots to untouched columns */
129 FLA_Cntl_sub_appiv1( cntl ) );
130
131 /* ATR = trilu( ATL ) \ ATR */
134 FLA_ONE, ATL, ATR );
135 }
136
137 return r_val;
138}
FLA_Error FLA_Apply_pivots_internal(FLA_Side side, FLA_Trans trans, FLA_Obj p, FLA_Obj A, fla_appiv_t *cntl)
Definition FLA_Apply_pivots_internal.c:13
FLA_Error FLA_Gemm_internal(FLA_Trans transa, FLA_Trans transb, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C, fla_gemm_t *cntl)
Definition FLA_Gemm_internal.c:16
FLA_Error FLA_LU_piv_internal(FLA_Obj A, FLA_Obj p, fla_lu_t *cntl)
Definition FLA_LU_piv_internal.c:15
FLA_Error FLA_Trsm_internal(FLA_Side side, FLA_Uplo uplo, FLA_Trans transa, FLA_Diag diag, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, fla_trsm_t *cntl)
Definition FLA_Trsm_internal.c:16
FLA_Error FLA_Trsm_external(FLA_Side side, FLA_Uplo uplo, FLA_Trans trans, FLA_Diag diag, FLA_Obj alpha, FLA_Obj A, FLA_Obj B)
Definition FLA_Trsm_external.c:13
FLA_Obj FLA_MINUS_ONE
Definition FLA_Init.c:22
FLA_Obj FLA_ONE
Definition FLA_Init.c:18
FLA_Error FLA_Cont_with_3x3_to_2x2(FLA_Obj *ATL, FLA_Obj *ATR, FLA_Obj A00, FLA_Obj A01, FLA_Obj A02, FLA_Obj A10, FLA_Obj A11, FLA_Obj A12, FLA_Obj *ABL, FLA_Obj *ABR, FLA_Obj A20, FLA_Obj A21, FLA_Obj A22, FLA_Quadrant quadrant)
Definition FLA_View.c:304
FLA_Error FLA_Part_2x2(FLA_Obj A, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition FLA_View.c:17
FLA_Error FLA_Cont_with_3x1_to_2x1(FLA_Obj *AT, FLA_Obj A0, FLA_Obj A1, FLA_Obj *AB, FLA_Obj A2, FLA_Side side)
Definition FLA_View.c:428
FLA_Error FLA_Repart_2x1_to_3x1(FLA_Obj AT, FLA_Obj *A0, FLA_Obj *A1, FLA_Obj AB, FLA_Obj *A2, dim_t mb, FLA_Side side)
Definition FLA_View.c:226
FLA_Error FLA_Cont_with_1x3_to_1x2(FLA_Obj *AL, FLA_Obj *AR, FLA_Obj A0, FLA_Obj A1, FLA_Obj A2, FLA_Side side)
Definition FLA_View.c:475
dim_t FLA_Obj_width(FLA_Obj obj)
Definition FLA_Query.c:123
FLA_Error FLA_Part_1x2(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition FLA_View.c:110
FLA_Error FLA_Part_2x1(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t mb, FLA_Side side)
Definition FLA_View.c:76
dim_t FLA_Obj_length(FLA_Obj obj)
Definition FLA_Query.c:116
FLA_Error FLA_Repart_2x2_to_3x3(FLA_Obj ATL, FLA_Obj ATR, FLA_Obj *A00, FLA_Obj *A01, FLA_Obj *A02, FLA_Obj *A10, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj ABL, FLA_Obj ABR, FLA_Obj *A20, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition FLA_View.c:142
FLA_Error FLA_Repart_1x2_to_1x3(FLA_Obj AL, FLA_Obj AR, FLA_Obj *A0, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition FLA_View.c:267
dim_t FLA_Determine_blocksize(FLA_Obj A_unproc, FLA_Quadrant to_dir, fla_blocksize_t *cntl_blocksizes)
Definition FLA_Blocksize.c:234
FLA_Error FLA_Merge_2x1(FLA_Obj AT, FLA_Obj AB, FLA_Obj *A)
Definition FLA_View.c:541
int FLA_Error
Definition FLA_type_defs.h:47
unsigned long dim_t
Definition FLA_type_defs.h:71
int i
Definition bl1_axmyv2.c:145
Definition FLA_type_defs.h:159

References FLA_Apply_pivots_internal(), FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_LU_piv_internal(), FLA_Merge_2x1(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_Obj_width(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Trsm_external(), FLA_Trsm_internal(), and i.

Referenced by FLA_LU_piv_internal().

◆ FLA_LU_piv_blk_var4()

FLA_Error FLA_LU_piv_blk_var4 ( FLA_Obj  A,
FLA_Obj  p,
fla_lu_t cntl 
)
16{
18 FLA_Obj ATL, ATR, A00, A01, A02,
19 ABL, ABR, A10, A11, A12,
20 A20, A21, A22;
21
22 FLA_Obj pT, p0,
23 pB, p1,
24 p2;
25
27
28 dim_t b;
29
30
31 FLA_Part_2x2( A, &ATL, &ATR,
32 &ABL, &ABR, 0, 0, FLA_TL );
33
34 FLA_Part_2x1( p, &pT,
35 &pB, 0, FLA_TOP );
36
37 while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) &&
39
41
42 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
43 /* ************* */ /* ******************** */
44 &A10, /**/ &A11, &A12,
45 ABL, /**/ ABR, &A20, /**/ &A21, &A22,
46 b, b, FLA_BR );
47
49 /* ** */ /* ** */
50 &p1,
51 pB, &p2, b, FLA_BOTTOM );
52
53 /*------------------------------------------------------------*/
54
55 // A11 = A11 - A10 * A0
58 FLA_Cntl_sub_gemm1( cntl ) );
59
60 // A21 = A21 - A20 * A01
63 FLA_Cntl_sub_gemm3( cntl ) );
64
65 // AB1 = / A11 \
66 // \ A21 /
68 A21, &AB1 );
69
70 // AB1, p1 = LU_piv( AB1 )
72 FLA_Cntl_sub_lu( cntl ) );
73
74 // If the unblocked algorithm returns a null pivot,
75 // update the pivot index and return it.
76 if ( r_val == FLA_SUCCESS && r_val_sub >= 0 )
77 {
79 }
80
81 // AB0 = / A10 \
82 // \ A20 /
84 A20, &AB0 );
85
86 // AB2 = / A12 \
87 // \ A22 /
89 A22, &AB2 );
90
91 // Apply pivots to remaining columns
93 FLA_Cntl_sub_appiv1( cntl ) );
95 FLA_Cntl_sub_appiv1( cntl ) );
96
97 // A12 = A12 - A10 * A02
100 FLA_Cntl_sub_gemm2( cntl ) );
101
102 // A12 = trilu( A11 ) \ A12
105 FLA_ONE, A11, A12,
106 FLA_Cntl_sub_trsm1( cntl ) );
107
108 /*------------------------------------------------------------*/
109
110 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
111 A10, A11, /**/ A12,
112 /* ************** */ /* ****************** */
113 &ABL, /**/ &ABR, A20, A21, /**/ A22,
114 FLA_TL );
115
117 p1,
118 /* ** */ /* ** */
119 &pB, p2, FLA_TOP );
120
121 }
122
123 return r_val;
124}

References FLA_Apply_pivots_internal(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_LU_piv_internal(), FLA_Merge_2x1(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_Obj_width(), FLA_ONE, FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Trsm_internal(), and i.

Referenced by FLA_LU_piv_internal().

◆ FLA_LU_piv_blk_var5()

FLA_Error FLA_LU_piv_blk_var5 ( FLA_Obj  A,
FLA_Obj  p,
fla_lu_t cntl 
)
14{
16 FLA_Obj ATL, ATR, A00, A01, A02,
17 ABL, ABR, A10, A11, A12,
18 A20, A21, A22;
19
20 FLA_Obj pT, p0,
21 pB, p1,
22 p2;
23
25
26 dim_t b;
27
28
29 FLA_Part_2x2( A, &ATL, &ATR,
30 &ABL, &ABR, 0, 0, FLA_TL );
31
32 FLA_Part_2x1( p, &pT,
33 &pB, 0, FLA_TOP );
34
35 while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) &&
37
39
40 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
41 /* ************* */ /* ******************** */
42 &A10, /**/ &A11, &A12,
43 ABL, /**/ ABR, &A20, /**/ &A21, &A22,
44 b, b, FLA_BR );
45
47 /* ** */ /* ** */
48 &p1,
49 pB, &p2, b, FLA_BOTTOM );
50
51 /*------------------------------------------------------------*/
52
53 // AB1 = / A11 \
54 // \ A21 /
56 A21, &AB1 );
57
58 // AB1, p1 = LU_piv( AB1 )
60 FLA_Cntl_sub_lu( cntl ) );
61
62 // If the unblocked algorithm returns a null pivot,
63 // update the pivot index and return it.
64 if ( r_val == FLA_SUCCESS && r_val_sub >= 0 )
65 {
67 }
68
69 // AB0 = / A10 \
70 // \ A20 /
72 A20, &AB0 );
73
74 // Apply computed pivots to AB0
76 FLA_Cntl_sub_appiv1( cntl ) );
77
78 // AB2 = / A12 \
79 // \ A22 /
81 A22, &AB2 );
82
83 // Apply computed pivots to AB2
85 FLA_Cntl_sub_appiv1( cntl ) );
86
87 // A12 = trilu( A11 ) \ A12
90 FLA_ONE, A11, A12,
91 FLA_Cntl_sub_trsm1( cntl ) );
92
93 // A22 = A22 - A21 * A12
96 FLA_Cntl_sub_gemm1( cntl ) );
97
98 /*------------------------------------------------------------*/
99
100 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
101 A10, A11, /**/ A12,
102 /* ************** */ /* ****************** */
103 &ABL, /**/ &ABR, A20, A21, /**/ A22,
104 FLA_TL );
105
107 p1,
108 /* ** */ /* ** */
109 &pB, p2, FLA_TOP );
110
111 }
112
113 return r_val;
114}

References FLA_Apply_pivots_internal(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_LU_piv_internal(), FLA_Merge_2x1(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_Obj_width(), FLA_ONE, FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Trsm_internal(), and i.

Referenced by FLA_LU_piv_internal().

◆ FLA_LU_piv_opc_var3()

FLA_Error FLA_LU_piv_opc_var3 ( int  m_A,
int  n_A,
scomplex buff_A,
int  rs_A,
int  cs_A,
int buff_p,
int  inc_p 
)
371{
375 int min_m_n = min( m_A, n_A );
376 int i;
377
378 for ( i = 0; i < min_m_n; ++i )
379 {
381 scomplex* A00 = buff_A + (0 )*cs_A + (0 )*rs_A;
382 scomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
383 scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
384 scomplex* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
385 scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
386 scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
387
388 scomplex* a1 = buff_A + (i )*cs_A + (0 )*rs_A;
389
390 int* p0 = buff_p;
391 int* pi1 = buff_p + i*inc_p;
392
393 int m_ahead = m_A - i - 1;
394 int mn_behind = i;
395
396 /*------------------------------------------------------------*/
397
398 // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, p0, a1 );
400 a1, rs_A, cs_A,
401 0,
402 mn_behind - 1,
403 p0, inc_p );
404
405 // FLA_Trsv_external( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_UNIT_DIAG, A00, a01 );
409 mn_behind,
410 A00, rs_A, cs_A,
411 a01, rs_A );
412
413 // FLA_Dots_external( FLA_MINUS_ONE, a10t, a01, FLA_ONE, alpha11 );
415 mn_behind,
416 buff_m1,
417 a10t, cs_A,
418 a01, rs_A,
419 buff_1,
420 alpha11 );
421
422 // FLA_Gemv_external( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, a01, FLA_ONE, a21 );
425 m_ahead,
426 mn_behind,
427 buff_m1,
428 A20, rs_A, cs_A,
429 a01, rs_A,
430 buff_1,
431 a21, rs_A );
432
433 // FLA_Merge_2x1( alpha11,
434 // a21, &aB1 );
435
436 // FLA_Amax_external( aB1, pi1 );
437 bl1_camax( m_ahead + 1,
438 alpha11, rs_A,
439 pi1 );
440
441 // If a null pivot is encountered, return the index.
442 pivot_val =*(alpha11 + *pi1);
443 if ( pivot_val.real == czero.real &&
444 pivot_val.imag == czero.imag ) r_val = ( r_val == FLA_SUCCESS ? i : r_val );
445 else
446 {
447 // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, aB1 );
449 alpha11, rs_A, cs_A,
450 0,
451 0,
452 pi1, inc_p );
453
454 // FLA_Inv_scal_external( alpha11, a21 );
456 m_ahead,
457 alpha11,
458 a21, rs_A );
459
460 // FLA_Merge_2x1( a10t,
461 // A20, &AB0 );
462
463 // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, AB0 );
465 a10t, rs_A, cs_A,
466 0,
467 0,
468 pi1, inc_p );
469 }
470 /*------------------------------------------------------------*/
471
472 }
473
474 if ( m_A < n_A )
475 {
478
479 // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, p, ATR );
481 ATR, rs_A, cs_A,
482 0,
483 m_A - 1,
484 buff_p, inc_p );
485
486 // FLA_Trsm_external( FLA_LEFT, FLA_LOWER_TRIANGULAR,
487 // FLA_NO_TRANSPOSE, FLA_UNIT_DIAG,
488 // FLA_ONE, ATL, ATR );
493 m_A,
494 n_A - m_A,
495 buff_1,
496 ATL, rs_A, cs_A,
497 ATR, rs_A, cs_A );
498 }
499
500 return r_val;
501}
FLA_Error FLA_Apply_pivots_ln_opc_var1(int n, scomplex *a, int a_rs, int a_cs, int k1, int k2, int *p, int incp)
Definition FLA_Apply_pivots_ln_opt_var1.c:356
const scomplex czero
Definition FLA_Init.c:38
void bl1_camax(int n, scomplex *x, int incx, int *index)
Definition bl1_amax.c:35
void bl1_cdots(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy, scomplex *beta, scomplex *rho)
Definition bl1_dots.c:39
void bl1_cgemv(trans1_t transa, conj1_t conjx, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition bl1_gemv.c:125
void bl1_cinvscalv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx)
Definition bl1_invscalv.c:52
void bl1_ctrsm(side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
Definition bl1_trsm.c:219
void bl1_ctrsv(uplo1_t uplo, trans1_t trans, diag1_t diag, int m, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx)
Definition bl1_trsv.c:99
@ BLIS1_LOWER_TRIANGULAR
Definition blis_type_defs.h:62
@ BLIS1_UNIT_DIAG
Definition blis_type_defs.h:75
@ BLIS1_NO_TRANSPOSE
Definition blis_type_defs.h:54
@ BLIS1_NO_CONJUGATE
Definition blis_type_defs.h:81
@ BLIS1_LEFT
Definition blis_type_defs.h:68
Definition blis_type_defs.h:133
float imag
Definition blis_type_defs.h:134
float real
Definition blis_type_defs.h:134

References bl1_camax(), bl1_cdots(), bl1_cgemv(), bl1_cinvscalv(), bl1_ctrsm(), bl1_ctrsv(), BLIS1_LEFT, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_UNIT_DIAG, czero, FLA_Apply_pivots_ln_opc_var1(), FLA_MINUS_ONE, FLA_ONE, i, scomplex::imag, and scomplex::real.

Referenced by FLA_LU_piv_opt_var3().

◆ FLA_LU_piv_opc_var4()

FLA_Error FLA_LU_piv_opc_var4 ( int  m_A,
int  n_A,
scomplex buff_A,
int  rs_A,
int  cs_A,
int buff_p,
int  inc_p 
)
346{
350 int min_m_n = min( m_A, n_A );
351 int i, is_null_pivot;
352
353 for ( i = 0; i < min_m_n; ++i )
354 {
356 scomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
357 scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
358 scomplex* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
359 scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
360 scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
361 scomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
362 scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
363
364 int* pi1 = buff_p + i*inc_p;
365
366 int m_ahead = m_A - i - 1;
367 int n_ahead = n_A - i - 1;
368 int mn_behind = i;
369
370 /*------------------------------------------------------------*/
371
372 // FLA_Dots_external( FLA_MINUS_ONE, a10t, a01, FLA_ONE, alpha11 );
374 mn_behind,
375 buff_m1,
376 a10t, cs_A,
377 a01, rs_A,
378 buff_1,
379 alpha11 );
380
381 // FLA_Gemv_external( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, a01, FLA_ONE, a21 );
384 m_ahead,
385 mn_behind,
386 buff_m1,
387 A20, rs_A, cs_A,
388 a01, rs_A,
389 buff_1,
390 a21, rs_A );
391
392 // FLA_Merge_2x1( alpha11,
393 // a21, &aB1 );
394
395 // FLA_Amax_external( aB1, pi1 );
396 bl1_camax( m_ahead + 1,
397 alpha11, rs_A,
398 pi1 );
399
400 // If a null pivot is encountered, return the index.
401 pivot_val =*(alpha11 + *pi1);
402
403 is_null_pivot = (pivot_val.real == czero.real && pivot_val.imag == czero.imag);
404 if ( is_null_pivot )
405 {
406 r_val = ( r_val == FLA_SUCCESS ? i : r_val );
407 }
408 else
409 {
410 // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, aB1 );
412 alpha11, rs_A, cs_A,
413 0,
414 0,
415 pi1, inc_p );
416
417 // FLA_Merge_2x1( a10t,
418 // A20, &AB0 );
419
420 // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, AB0 );
422 a10t, rs_A, cs_A,
423 0,
424 0,
425 pi1, inc_p );
426
427 // FLA_Merge_2x1( a12t,
428 // A22, &AB2 );
429
430 // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, AB2 );
432 a12t, rs_A, cs_A,
433 0,
434 0,
435 pi1, inc_p );
436 }
437
438 // FLA_Gemv_external( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, a10t, FLA_ONE, a12t );
441 mn_behind,
442 n_ahead,
443 buff_m1,
444 A02, rs_A, cs_A,
445 a10t, cs_A,
446 buff_1,
447 a12t, cs_A );
448
449 if ( ! is_null_pivot )
450 {
451 // FLA_Inv_scal_external( alpha11, a21 );
453 m_ahead,
454 alpha11,
455 a21, rs_A );
456 }
457 /*------------------------------------------------------------*/
458
459 }
460
461 return r_val;
462}
@ BLIS1_TRANSPOSE
Definition blis_type_defs.h:55

References bl1_camax(), bl1_cdots(), bl1_cgemv(), bl1_cinvscalv(), BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, czero, FLA_Apply_pivots_ln_opc_var1(), FLA_MINUS_ONE, FLA_ONE, i, scomplex::imag, and scomplex::real.

Referenced by FLA_LU_piv_opt_var4().

◆ FLA_LU_piv_opc_var5()

FLA_Error FLA_LU_piv_opc_var5 ( int  m_A,
int  n_A,
scomplex buff_A,
int  rs_A,
int  cs_A,
int buff_p,
int  inc_p 
)
243{
246 int min_m_n = min( m_A, n_A );
247 int i;
248
249 for ( i = 0; i < min_m_n; ++i )
250 {
252 scomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
253 scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
254 scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
255 scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
256 scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
257
258 int* pi1 = buff_p + i*inc_p;
259
260 int m_ahead = m_A - i - 1;
261 int n_ahead = n_A - i - 1;
262
263 /*------------------------------------------------------------*/
264
265 // FLA_Merge_2x1( alpha11,
266 // a21, &aB1 );
267
268 // FLA_Amax_external( aB1, pi1 );
269 bl1_camax( m_ahead + 1,
270 alpha11, rs_A,
271 pi1 );
272
273 // If a null pivot is encountered, return the index.
274 pivot_val = *(alpha11 + *pi1);
275 if ( pivot_val.real == czero.real &&
276 pivot_val.imag == czero.imag ) r_val = ( r_val == FLA_SUCCESS ? i : r_val );
277 else
278 {
279 // FLA_Merge_1x2( ABL, ABR, &AB );
280
281 // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, AB );
283 a10t, rs_A, cs_A,
284 0,
285 0,
286 pi1, inc_p );
287
288 // FLA_Inv_scal_external( alpha11, a21 );
290 m_ahead,
291 alpha11,
292 a21, rs_A );
293 }
294
295 // FLA_Ger_external( FLA_MINUS_ONE, a21, a12t, A22 );
298 m_ahead,
299 n_ahead,
300 buff_m1,
301 a21, rs_A,
302 a12t, cs_A,
303 A22, rs_A, cs_A );
304
305 /*------------------------------------------------------------*/
306
307 }
308
309 return r_val;
310}
void bl1_cger(conj1_t conjx, conj1_t conjy, int m, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy, scomplex *a, int a_rs, int a_cs)
Definition bl1_ger.c:111

References bl1_camax(), bl1_cger(), bl1_cinvscalv(), BLIS1_NO_CONJUGATE, czero, FLA_Apply_pivots_ln_opc_var1(), FLA_MINUS_ONE, i, scomplex::imag, and scomplex::real.

Referenced by FLA_LU_piv_opt_var5().

◆ FLA_LU_piv_opd_var3()

FLA_Error FLA_LU_piv_opd_var3 ( int  m_A,
int  n_A,
double buff_A,
int  rs_A,
int  cs_A,
int buff_p,
int  inc_p 
)
234{
236 double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
238 int min_m_n = min( m_A, n_A );
239 int i;
240
241 for ( i = 0; i < min_m_n; ++i )
242 {
243 double pivot_val = dzero;
244 double* A00 = buff_A + (0 )*cs_A + (0 )*rs_A;
245 double* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
246 double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
247 double* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
248 double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
249 double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
250
251 double* a1 = buff_A + (i )*cs_A + (0 )*rs_A;
252
253 int* p0 = buff_p;
254 int* pi1 = buff_p + i*inc_p;
255
256 int m_ahead = m_A - i - 1;
257 int mn_behind = i;
258
259 /*------------------------------------------------------------*/
260
261 // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, p0, a1 );
263 a1, rs_A, cs_A,
264 0,
265 mn_behind - 1,
266 p0, inc_p );
267
268 // FLA_Trsv_external( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_UNIT_DIAG, A00, a01 );
272 mn_behind,
273 A00, rs_A, cs_A,
274 a01, rs_A );
275
276 // FLA_Dots_external( FLA_MINUS_ONE, a10t, a01, FLA_ONE, alpha11 );
278 mn_behind,
279 buff_m1,
280 a10t, cs_A,
281 a01, rs_A,
282 buff_1,
283 alpha11 );
284
285 // FLA_Gemv_external( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, a01, FLA_ONE, a21 );
288 m_ahead,
289 mn_behind,
290 buff_m1,
291 A20, rs_A, cs_A,
292 a01, rs_A,
293 buff_1,
294 a21, rs_A );
295
296 // FLA_Merge_2x1( alpha11,
297 // a21, &aB1 );
298
299 // FLA_Amax_external( aB1, pi1 );
300 bl1_damax( m_ahead + 1,
301 alpha11, rs_A,
302 pi1 );
303
304 // If a null pivot is encountered, return the index.
305 pivot_val =*(alpha11 + *pi1);
306 if ( pivot_val == dzero ) r_val = ( r_val == FLA_SUCCESS ? i : r_val );
307 else
308 {
309 // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, aB1 );
311 alpha11, rs_A, cs_A,
312 0,
313 0,
314 pi1, inc_p );
315
316 // FLA_Inv_scal_external( alpha11, a21 );
318 m_ahead,
319 alpha11,
320 a21, rs_A );
321
322 // FLA_Merge_2x1( a10t,
323 // A20, &AB0 );
324
325 // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, AB0 );
327 a10t, rs_A, cs_A,
328 0,
329 0,
330 pi1, inc_p );
331 }
332 /*------------------------------------------------------------*/
333
334 }
335
336 if ( m_A < n_A )
337 {
338 double* ATL = buff_A;
339 double* ATR = buff_A + m_A*cs_A;
340
341 // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, p, ATR );
343 ATR, rs_A, cs_A,
344 0,
345 m_A - 1,
346 buff_p, inc_p );
347
348 // FLA_Trsm_external( FLA_LEFT, FLA_LOWER_TRIANGULAR,
349 // FLA_NO_TRANSPOSE, FLA_UNIT_DIAG,
350 // FLA_ONE, ATL, ATR );
355 m_A,
356 n_A - m_A,
357 buff_1,
358 ATL, rs_A, cs_A,
359 ATR, rs_A, cs_A );
360 }
361
362 return r_val;
363}
FLA_Error FLA_Apply_pivots_ln_opd_var1(int n, double *a, int a_rs, int a_cs, int k1, int k2, int *p, int incp)
Definition FLA_Apply_pivots_ln_opt_var1.c:274
const double dzero
Definition FLA_Init.c:37
void bl1_damax(int n, double *x, int incx, int *index)
Definition bl1_amax.c:24
void bl1_ddots(conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy, double *beta, double *rho)
Definition bl1_dots.c:26
void bl1_dgemv(trans1_t transa, conj1_t conjx, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
Definition bl1_gemv.c:69
void bl1_dinvscalv(conj1_t conj, int n, double *alpha, double *x, int incx)
Definition bl1_invscalv.c:26
void bl1_dtrsm(side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
Definition bl1_trsm.c:116
void bl1_dtrsv(uplo1_t uplo, trans1_t trans, diag1_t diag, int m, double *a, int a_rs, int a_cs, double *x, int incx)
Definition bl1_trsv.c:56

References bl1_damax(), bl1_ddots(), bl1_dgemv(), bl1_dinvscalv(), bl1_dtrsm(), bl1_dtrsv(), BLIS1_LEFT, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_UNIT_DIAG, dzero, FLA_Apply_pivots_ln_opd_var1(), FLA_MINUS_ONE, FLA_ONE, and i.

Referenced by FLA_LU_piv_opt_var3().

◆ FLA_LU_piv_opd_var4()

FLA_Error FLA_LU_piv_opd_var4 ( int  m_A,
int  n_A,
double buff_A,
int  rs_A,
int  cs_A,
int buff_p,
int  inc_p 
)
222{
224 double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
226 int min_m_n = min( m_A, n_A );
227 int i, is_null_pivot;
228
229 for ( i = 0; i < min_m_n; ++i )
230 {
231 double pivot_val = dzero;
232 double* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
233 double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
234 double* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
235 double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
236 double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
237 double* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
238 double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
239
240 int* pi1 = buff_p + i*inc_p;
241
242 int m_ahead = m_A - i - 1;
243 int n_ahead = n_A - i - 1;
244 int mn_behind = i;
245
246 /*------------------------------------------------------------*/
247
248 // FLA_Dots_external( FLA_MINUS_ONE, a10t, a01, FLA_ONE, alpha11 );
250 mn_behind,
251 buff_m1,
252 a10t, cs_A,
253 a01, rs_A,
254 buff_1,
255 alpha11 );
256
257 // FLA_Gemv_external( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, a01, FLA_ONE, a21 );
260 m_ahead,
261 mn_behind,
262 buff_m1,
263 A20, rs_A, cs_A,
264 a01, rs_A,
265 buff_1,
266 a21, rs_A );
267
268 // FLA_Merge_2x1( alpha11,
269 // a21, &aB1 );
270
271 // FLA_Amax_external( aB1, pi1 );
272 bl1_damax( m_ahead + 1,
273 alpha11, rs_A,
274 pi1 );
275
276 // If a null pivot is encountered, return the index.
277 pivot_val =*(alpha11 + *pi1);
278
280 if ( is_null_pivot )
281 {
282 r_val = ( r_val == FLA_SUCCESS ? i : r_val );
283 }
284 else
285 {
286 // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, aB1 );
288 alpha11, rs_A, cs_A,
289 0,
290 0,
291 pi1, inc_p );
292
293 // FLA_Merge_2x1( a10t,
294 // A20, &AB0 );
295
296 // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, AB0 );
298 a10t, rs_A, cs_A,
299 0,
300 0,
301 pi1, inc_p );
302
303 // FLA_Merge_2x1( a12t,
304 // A22, &AB2 );
305
306 // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, AB2 );
308 a12t, rs_A, cs_A,
309 0,
310 0,
311 pi1, inc_p );
312 }
313
314 // FLA_Gemv_external( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, a10t, FLA_ONE, a12t );
317 mn_behind,
318 n_ahead,
319 buff_m1,
320 A02, rs_A, cs_A,
321 a10t, cs_A,
322 buff_1,
323 a12t, cs_A );
324
325 if ( ! is_null_pivot )
326 {
327 // FLA_Inv_scal_external( alpha11, a21 );
329 m_ahead,
330 alpha11,
331 a21, rs_A );
332 }
333 /*------------------------------------------------------------*/
334
335 }
336
337 return r_val;
338}

References bl1_damax(), bl1_ddots(), bl1_dgemv(), bl1_dinvscalv(), BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, dzero, FLA_Apply_pivots_ln_opd_var1(), FLA_MINUS_ONE, FLA_ONE, and i.

Referenced by FLA_LU_piv_opt_var4().

◆ FLA_LU_piv_opd_var5()

FLA_Error FLA_LU_piv_opd_var5 ( int  m_A,
int  n_A,
double buff_A,
int  rs_A,
int  cs_A,
int buff_p,
int  inc_p 
)
169{
172 int min_m_n = min( m_A, n_A );
173 int i;
174
175 for ( i = 0; i < min_m_n; ++i )
176 {
177 double pivot_val = dzero;
178 double* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
179 double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
180 double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
181 double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
182 double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
183
184 int* pi1 = buff_p + i*inc_p;
185
186 int m_ahead = m_A - i - 1;
187 int n_ahead = n_A - i - 1;
188
189 /*------------------------------------------------------------*/
190
191 // FLA_Merge_2x1( alpha11,
192 // a21, &aB1 );
193
194 // FLA_Amax_external( aB1, pi1 );
195 bl1_damax( m_ahead + 1,
196 alpha11, rs_A,
197 pi1 );
198
199 // If a null pivot is encountered, return the index.
200 pivot_val = *(alpha11 + *pi1);
201 if ( pivot_val == dzero ) r_val = ( r_val == FLA_SUCCESS ? i : r_val );
202 else
203 {
204 // FLA_Merge_1x2( ABL, ABR, &AB );
205
206 // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, AB );
208 a10t, rs_A, cs_A,
209 0,
210 0,
211 pi1, inc_p );
212
213 // FLA_Inv_scal_external( alpha11, a21 );
215 m_ahead,
216 alpha11,
217 a21, rs_A );
218 }
219
220 // FLA_Ger_external( FLA_MINUS_ONE, a21, a12t, A22 );
223 m_ahead,
224 n_ahead,
225 buff_m1,
226 a21, rs_A,
227 a12t, cs_A,
228 A22, rs_A, cs_A );
229
230 /*------------------------------------------------------------*/
231
232 }
233
234 return r_val;
235}
void bl1_dger(conj1_t conjx, conj1_t conjy, int m, int n, double *alpha, double *x, int incx, double *y, int incy, double *a, int a_rs, int a_cs)
Definition bl1_ger.c:62

References bl1_damax(), bl1_dger(), bl1_dinvscalv(), BLIS1_NO_CONJUGATE, dzero, FLA_Apply_pivots_ln_opd_var1(), FLA_MINUS_ONE, and i.

Referenced by FLA_LU_piv_opt_var5().

◆ FLA_LU_piv_ops_var3()

FLA_Error FLA_LU_piv_ops_var3 ( int  m_A,
int  n_A,
float buff_A,
int  rs_A,
int  cs_A,
int buff_p,
int  inc_p 
)
97{
99 float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
101 int min_m_n = min( m_A, n_A );
102 int i;
103
104 for ( i = 0; i < min_m_n; ++i )
105 {
106 float pivot_val = fzero;
107 float* A00 = buff_A + (0 )*cs_A + (0 )*rs_A;
108 float* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
109 float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
110 float* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
111 float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
112 float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
113
114 float* a1 = buff_A + (i )*cs_A + (0 )*rs_A;
115
116 int* p0 = buff_p;
117 int* pi1 = buff_p + i*inc_p;
118
119 int m_ahead = m_A - i - 1;
120 int mn_behind = i;
121
122 /*------------------------------------------------------------*/
123
124 // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, p0, a1 );
126 a1, rs_A, cs_A,
127 0,
128 mn_behind - 1,
129 p0, inc_p );
130
131 // FLA_Trsv_external( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_UNIT_DIAG, A00, a01 );
135 mn_behind,
136 A00, rs_A, cs_A,
137 a01, rs_A );
138
139 // FLA_Dots_external( FLA_MINUS_ONE, a10t, a01, FLA_ONE, alpha11 );
141 mn_behind,
142 buff_m1,
143 a10t, cs_A,
144 a01, rs_A,
145 buff_1,
146 alpha11 );
147
148 // FLA_Gemv_external( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, a01, FLA_ONE, a21 );
151 m_ahead,
152 mn_behind,
153 buff_m1,
154 A20, rs_A, cs_A,
155 a01, rs_A,
156 buff_1,
157 a21, rs_A );
158
159 // FLA_Merge_2x1( alpha11,
160 // a21, &aB1 );
161
162 // FLA_Amax_external( aB1, pi1 );
163 bl1_samax( m_ahead + 1,
164 alpha11, rs_A,
165 pi1 );
166
167 // If a null pivot is encountered, return the index.
168 pivot_val = *(alpha11 + *pi1);
169 if ( pivot_val == fzero ) r_val = ( r_val == FLA_SUCCESS ? i : r_val );
170 else
171 {
172 // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, aB1 );
174 alpha11, rs_A, cs_A,
175 0,
176 0,
177 pi1, inc_p );
178
179 // FLA_Inv_scal_external( alpha11, a21 );
181 m_ahead,
182 alpha11,
183 a21, rs_A );
184
185 // FLA_Merge_2x1( a10t,
186 // A20, &AB0 );
187
188 // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, AB0 );
190 a10t, rs_A, cs_A,
191 0,
192 0,
193 pi1, inc_p );
194 }
195 /*------------------------------------------------------------*/
196
197 }
198
199 if ( m_A < n_A )
200 {
201 float* ATL = buff_A;
202 float* ATR = buff_A + m_A*cs_A;
203
204 // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, p, ATR );
206 ATR, rs_A, cs_A,
207 0,
208 m_A - 1,
209 buff_p, inc_p );
210
211 // FLA_Trsm_external( FLA_LEFT, FLA_LOWER_TRIANGULAR,
212 // FLA_NO_TRANSPOSE, FLA_UNIT_DIAG,
213 // FLA_ONE, ATL, ATR );
218 m_A,
219 n_A - m_A,
220 buff_1,
221 ATL, rs_A, cs_A,
222 ATR, rs_A, cs_A );
223 }
224
225 return r_val;
226}
FLA_Error FLA_Apply_pivots_ln_ops_var1(int n, float *a, int a_rs, int a_cs, int k1, int k2, int *p, int incp)
Definition FLA_Apply_pivots_ln_opt_var1.c:192
const float fzero
Definition FLA_Init.c:36
void bl1_samax(int n, float *x, int incx, int *index)
Definition bl1_amax.c:13
void bl1_sdots(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy, float *beta, float *rho)
Definition bl1_dots.c:13
void bl1_sgemv(trans1_t transa, conj1_t conjx, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
Definition bl1_gemv.c:13
void bl1_sinvscalv(conj1_t conj, int n, float *alpha, float *x, int incx)
Definition bl1_invscalv.c:13
void bl1_strsm(side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
Definition bl1_trsm.c:13
void bl1_strsv(uplo1_t uplo, trans1_t trans, diag1_t diag, int m, float *a, int a_rs, int a_cs, float *x, int incx)
Definition bl1_trsv.c:13

References bl1_samax(), bl1_sdots(), bl1_sgemv(), bl1_sinvscalv(), bl1_strsm(), bl1_strsv(), BLIS1_LEFT, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_UNIT_DIAG, FLA_Apply_pivots_ln_ops_var1(), FLA_MINUS_ONE, FLA_ONE, fzero, and i.

Referenced by FLA_LU_piv_opt_var3().

◆ FLA_LU_piv_ops_var4()

FLA_Error FLA_LU_piv_ops_var4 ( int  m_A,
int  n_A,
float buff_A,
int  rs_A,
int  cs_A,
int buff_p,
int  inc_p 
)
97{
99 float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
101 int min_m_n = min( m_A, n_A );
102 int i, is_null_pivot;
103
104
105 for ( i = 0; i < min_m_n; ++i )
106 {
107 float pivot_val = fzero;
108 float* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
109 float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
110 float* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
111 float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
112 float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
113 float* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
114 float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
115
116 int* pi1 = buff_p + i*inc_p;
117
118 int m_ahead = m_A - i - 1;
119 int n_ahead = n_A - i - 1;
120 int mn_behind = i;
121
122 /*------------------------------------------------------------*/
123
124 // FLA_Dots_external( FLA_MINUS_ONE, a10t, a01, FLA_ONE, alpha11 );
126 mn_behind,
127 buff_m1,
128 a10t, cs_A,
129 a01, rs_A,
130 buff_1,
131 alpha11 );
132
133 // FLA_Gemv_external( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, a01, FLA_ONE, a21 );
136 m_ahead,
137 mn_behind,
138 buff_m1,
139 A20, rs_A, cs_A,
140 a01, rs_A,
141 buff_1,
142 a21, rs_A );
143
144 // FLA_Merge_2x1( alpha11,
145 // a21, &aB1 );
146
147 // FLA_Amax_external( aB1, pi1 );
148 bl1_samax( m_ahead + 1,
149 alpha11, rs_A,
150 pi1 );
151
152 // If a null pivot is encountered, return the index.
153 pivot_val = *(alpha11 + *pi1);
154
156 if ( is_null_pivot )
157 {
158 r_val = ( r_val == FLA_SUCCESS ? i : r_val );
159 }
160 else
161 {
162 // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, aB1 );
164 alpha11, rs_A, cs_A,
165 0,
166 0,
167 pi1, inc_p );
168
169 // FLA_Merge_2x1( a10t,
170 // A20, &AB0 );
171
172 // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, AB0 );
174 a10t, rs_A, cs_A,
175 0,
176 0,
177 pi1, inc_p );
178
179 // FLA_Merge_2x1( a12t,
180 // A22, &AB2 );
181
182 // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, AB2 );
184 a12t, rs_A, cs_A,
185 0,
186 0,
187 pi1, inc_p );
188 }
189
190 // FLA_Gemv_external( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, a10t, FLA_ONE, a12t );
193 mn_behind,
194 n_ahead,
195 buff_m1,
196 A02, rs_A, cs_A,
197 a10t, cs_A,
198 buff_1,
199 a12t, cs_A );
200
201 if ( ! is_null_pivot )
202 {
203 // FLA_Inv_scal_external( alpha11, a21 );
205 m_ahead,
206 alpha11,
207 a21, rs_A );
208 }
209 /*------------------------------------------------------------*/
210
211 }
212
213 return r_val;
214}

References bl1_samax(), bl1_sdots(), bl1_sgemv(), bl1_sinvscalv(), BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_Apply_pivots_ln_ops_var1(), FLA_MINUS_ONE, FLA_ONE, fzero, and i.

Referenced by FLA_LU_piv_opt_var4().

◆ FLA_LU_piv_ops_var5()

FLA_Error FLA_LU_piv_ops_var5 ( int  m_A,
int  n_A,
float buff_A,
int  rs_A,
int  cs_A,
int buff_p,
int  inc_p 
)
95{
98 int min_m_n = min( m_A, n_A );
99 int i;
100
101 for ( i = 0; i < min_m_n; ++i )
102 {
103 float pivot_val = fzero;
104 float* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
105 float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
106 float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
107 float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
108 float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
109
110 int* pi1 = buff_p + i*inc_p;
111
112 int m_ahead = m_A - i - 1;
113 int n_ahead = n_A - i - 1;
114
115 /*------------------------------------------------------------*/
116
117 // FLA_Merge_2x1( alpha11,
118 // a21, &aB1 );
119
120 // FLA_Amax_external( aB1, pi1 );
121 bl1_samax( m_ahead + 1,
122 alpha11, rs_A,
123 pi1 );
124
125 // If a null pivot is encountered, return the index.
126 pivot_val = *(alpha11 + *pi1);
127 if ( pivot_val == fzero ) r_val = ( r_val == FLA_SUCCESS ? i : r_val );
128 else
129 {
130 // FLA_Merge_1x2( ABL, ABR, &AB );
131
132 // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, AB );
134 a10t, rs_A, cs_A,
135 0,
136 0,
137 pi1, inc_p );
138
139 // FLA_Inv_scal_external( alpha11, a21 );
141 m_ahead,
142 alpha11,
143 a21, rs_A );
144 }
145
146 // FLA_Ger_external( FLA_MINUS_ONE, a21, a12t, A22 );
149 m_ahead,
150 n_ahead,
151 buff_m1,
152 a21, rs_A,
153 a12t, cs_A,
154 A22, rs_A, cs_A );
155
156 /*------------------------------------------------------------*/
157
158 }
159
160 return r_val;
161}
void bl1_sger(conj1_t conjx, conj1_t conjy, int m, int n, float *alpha, float *x, int incx, float *y, int incy, float *a, int a_rs, int a_cs)
Definition bl1_ger.c:13

References bl1_samax(), bl1_sger(), bl1_sinvscalv(), BLIS1_NO_CONJUGATE, FLA_Apply_pivots_ln_ops_var1(), FLA_MINUS_ONE, fzero, and i.

Referenced by FLA_LU_piv_opt_var5().

◆ FLA_LU_piv_opt_var3()

FLA_Error FLA_LU_piv_opt_var3 ( FLA_Obj  A,
FLA_Obj  p 
)
16{
18 FLA_Datatype datatype;
19 int m_A, n_A;
20 int rs_A, cs_A;
21 int inc_p;
22
23 datatype = FLA_Obj_datatype( A );
24
25 m_A = FLA_Obj_length( A );
26 n_A = FLA_Obj_width( A );
29
31
32
33 switch ( datatype )
34 {
35 case FLA_FLOAT:
36 {
37 float* buff_A = FLA_FLOAT_PTR( A );
38 int* buff_p = FLA_INT_PTR( p );
39
41 n_A,
43 buff_p, inc_p );
44
45 break;
46 }
47
48 case FLA_DOUBLE:
49 {
50 double* buff_A = FLA_DOUBLE_PTR( A );
51 int* buff_p = FLA_INT_PTR( p );
52
54 n_A,
56 buff_p, inc_p );
57
58 break;
59 }
60
61 case FLA_COMPLEX:
62 {
64 int* buff_p = FLA_INT_PTR( p );
65
67 n_A,
69 buff_p, inc_p );
70
71 break;
72 }
73
75 {
77 int* buff_p = FLA_INT_PTR( p );
78
80 n_A,
82 buff_p, inc_p );
83
84 break;
85 }
86 }
87
88 return r_val;
89}
FLA_Error FLA_LU_piv_opz_var3(int m_A, int n_A, dcomplex *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
Definition FLA_LU_piv_opt_var3.c:505
FLA_Error FLA_LU_piv_opc_var3(int m_A, int n_A, scomplex *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
Definition FLA_LU_piv_opt_var3.c:367
FLA_Error FLA_LU_piv_opd_var3(int m_A, int n_A, double *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
Definition FLA_LU_piv_opt_var3.c:230
FLA_Error FLA_LU_piv_ops_var3(int m_A, int n_A, float *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
Definition FLA_LU_piv_opt_var3.c:93
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition FLA_Query.c:167
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition FLA_Query.c:174
dim_t FLA_Obj_vector_inc(FLA_Obj obj)
Definition FLA_Query.c:145
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition FLA_Query.c:13
int FLA_Datatype
Definition FLA_type_defs.h:49
Definition blis_type_defs.h:138

References FLA_LU_piv_opc_var3(), FLA_LU_piv_opd_var3(), FLA_LU_piv_ops_var3(), FLA_LU_piv_opz_var3(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), FLA_Obj_width(), and i.

Referenced by FLA_LU_piv_internal().

◆ FLA_LU_piv_opt_var4()

FLA_Error FLA_LU_piv_opt_var4 ( FLA_Obj  A,
FLA_Obj  p 
)
16{
18 FLA_Datatype datatype;
19 int m_A, n_A;
20 int rs_A, cs_A;
21 int inc_p;
22
23 datatype = FLA_Obj_datatype( A );
24
25 m_A = FLA_Obj_length( A );
26 n_A = FLA_Obj_width( A );
29
31
32
33 switch ( datatype )
34 {
35 case FLA_FLOAT:
36 {
37 float* buff_A = FLA_FLOAT_PTR( A );
38 int* buff_p = FLA_INT_PTR( p );
39
41 n_A,
43 buff_p, inc_p );
44
45 break;
46 }
47
48 case FLA_DOUBLE:
49 {
50 double* buff_A = FLA_DOUBLE_PTR( A );
51 int* buff_p = FLA_INT_PTR( p );
52
54 n_A,
56 buff_p, inc_p );
57
58 break;
59 }
60
61 case FLA_COMPLEX:
62 {
64 int* buff_p = FLA_INT_PTR( p );
65
67 n_A,
69 buff_p, inc_p );
70
71 break;
72 }
73
75 {
77 int* buff_p = FLA_INT_PTR( p );
78
80 n_A,
82 buff_p, inc_p );
83
84 break;
85 }
86 }
87
88 return r_val;
89}
FLA_Error FLA_LU_piv_opc_var4(int m_A, int n_A, scomplex *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
Definition FLA_LU_piv_opt_var4.c:342
FLA_Error FLA_LU_piv_ops_var4(int m_A, int n_A, float *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
Definition FLA_LU_piv_opt_var4.c:93
FLA_Error FLA_LU_piv_opd_var4(int m_A, int n_A, double *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
Definition FLA_LU_piv_opt_var4.c:218
FLA_Error FLA_LU_piv_opz_var4(int m_A, int n_A, dcomplex *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
Definition FLA_LU_piv_opt_var4.c:466

References FLA_LU_piv_opc_var4(), FLA_LU_piv_opd_var4(), FLA_LU_piv_ops_var4(), FLA_LU_piv_opz_var4(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), FLA_Obj_width(), and i.

Referenced by FLA_LU_piv_internal().

◆ FLA_LU_piv_opt_var5()

FLA_Error FLA_LU_piv_opt_var5 ( FLA_Obj  A,
FLA_Obj  p 
)
14{
16 FLA_Datatype datatype;
17 int m_A, n_A;
18 int rs_A, cs_A;
19 int inc_p;
20
21 datatype = FLA_Obj_datatype( A );
22
23 m_A = FLA_Obj_length( A );
24 n_A = FLA_Obj_width( A );
27
29
30
31 switch ( datatype )
32 {
33 case FLA_FLOAT:
34 {
35 float* buff_A = FLA_FLOAT_PTR( A );
36 int* buff_p = FLA_INT_PTR( p );
37
39 n_A,
41 buff_p, inc_p );
42
43 break;
44 }
45
46 case FLA_DOUBLE:
47 {
48 double* buff_A = FLA_DOUBLE_PTR( A );
49 int* buff_p = FLA_INT_PTR( p );
50
52 n_A,
54 buff_p, inc_p );
55
56 break;
57 }
58
59 case FLA_COMPLEX:
60 {
62 int* buff_p = FLA_INT_PTR( p );
63
65 n_A,
67 buff_p, inc_p );
68
69 break;
70 }
71
73 {
75 int* buff_p = FLA_INT_PTR( p );
76
78 n_A,
80 buff_p, inc_p );
81
82 break;
83 }
84 }
85
86 return r_val;
87}
FLA_Error FLA_LU_piv_opz_var5(int m_A, int n_A, dcomplex *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
Definition FLA_LU_piv_opt_var5.c:314
FLA_Error FLA_LU_piv_opc_var5(int m_A, int n_A, scomplex *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
Definition FLA_LU_piv_opt_var5.c:239
FLA_Error FLA_LU_piv_ops_var5(int m_A, int n_A, float *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
Definition FLA_LU_piv_opt_var5.c:91
FLA_Error FLA_LU_piv_opd_var5(int m_A, int n_A, double *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
Definition FLA_LU_piv_opt_var5.c:165

References FLA_LU_piv_opc_var5(), FLA_LU_piv_opd_var5(), FLA_LU_piv_ops_var5(), FLA_LU_piv_opz_var5(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), FLA_Obj_width(), and i.

Referenced by FLA_LU_piv_internal().

◆ FLA_LU_piv_opz_var3()

FLA_Error FLA_LU_piv_opz_var3 ( int  m_A,
int  n_A,
dcomplex buff_A,
int  rs_A,
int  cs_A,
int buff_p,
int  inc_p 
)
509{
513 int min_m_n = min( m_A, n_A );
514 int i;
515
516 for ( i = 0; i < min_m_n; ++i )
517 {
519 dcomplex* A00 = buff_A + (0 )*cs_A + (0 )*rs_A;
520 dcomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
521 dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
522 dcomplex* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
523 dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
524 dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
525
526 dcomplex* a1 = buff_A + (i )*cs_A + (0 )*rs_A;
527
528 int* p0 = buff_p;
529 int* pi1 = buff_p + i*inc_p;
530
531 int m_ahead = m_A - i - 1;
532 int mn_behind = i;
533
534 /*------------------------------------------------------------*/
535
536 // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, p0, a1 );
538 a1, rs_A, cs_A,
539 0,
540 mn_behind - 1,
541 p0, inc_p );
542
543 // FLA_Trsv_external( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_UNIT_DIAG, A00, a01 );
547 mn_behind,
548 A00, rs_A, cs_A,
549 a01, rs_A );
550
551 // FLA_Dots_external( FLA_MINUS_ONE, a10t, a01, FLA_ONE, alpha11 );
553 mn_behind,
554 buff_m1,
555 a10t, cs_A,
556 a01, rs_A,
557 buff_1,
558 alpha11 );
559
560 // FLA_Gemv_external( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, a01, FLA_ONE, a21 );
563 m_ahead,
564 mn_behind,
565 buff_m1,
566 A20, rs_A, cs_A,
567 a01, rs_A,
568 buff_1,
569 a21, rs_A );
570
571 // FLA_Merge_2x1( alpha11,
572 // a21, &aB1 );
573
574 // FLA_Amax_external( aB1, pi1 );
575 bl1_zamax( m_ahead + 1,
576 alpha11, rs_A,
577 pi1 );
578
579 // If a null pivot is encountered, return the index.
580 pivot_val =*(alpha11 + *pi1);
581 if ( pivot_val.real == zzero.real &&
582 pivot_val.imag == zzero.imag ) r_val = ( r_val == FLA_SUCCESS ? i : r_val );
583 else
584 {
585 // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, aB1 );
587 alpha11, rs_A, cs_A,
588 0,
589 0,
590 pi1, inc_p );
591
592 // FLA_Inv_scal_external( alpha11, a21 );
594 m_ahead,
595 alpha11,
596 a21, rs_A );
597
598 // FLA_Merge_2x1( a10t,
599 // A20, &AB0 );
600
601 // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, AB0 );
603 a10t, rs_A, cs_A,
604 0,
605 0,
606 pi1, inc_p );
607 }
608 /*------------------------------------------------------------*/
609
610 }
611
612 if ( m_A < n_A )
613 {
616
617 // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, p, ATR );
619 ATR, rs_A, cs_A,
620 0,
621 m_A - 1,
622 buff_p, inc_p );
623
624 // FLA_Trsm_external( FLA_LEFT, FLA_LOWER_TRIANGULAR,
625 // FLA_NO_TRANSPOSE, FLA_UNIT_DIAG,
626 // FLA_ONE, ATL, ATR );
631 m_A,
632 n_A - m_A,
633 buff_1,
634 ATL, rs_A, cs_A,
635 ATR, rs_A, cs_A );
636 }
637
638 return r_val;
639}
FLA_Error FLA_Apply_pivots_ln_opz_var1(int n, dcomplex *a, int a_rs, int a_cs, int k1, int k2, int *p, int incp)
Definition FLA_Apply_pivots_ln_opt_var1.c:438
const dcomplex zzero
Definition FLA_Init.c:39
void bl1_zamax(int n, dcomplex *x, int incx, int *index)
Definition bl1_amax.c:46
void bl1_zdots(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *beta, dcomplex *rho)
Definition bl1_dots.c:56
void bl1_zgemv(trans1_t transa, conj1_t conjx, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition bl1_gemv.c:255
void bl1_zinvscalv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx)
Definition bl1_invscalv.c:78
void bl1_ztrsm(side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
Definition bl1_trsm.c:369
void bl1_ztrsv(uplo1_t uplo, trans1_t trans, diag1_t diag, int m, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx)
Definition bl1_trsv.c:177
double real
Definition blis_type_defs.h:139
double imag
Definition blis_type_defs.h:139

References bl1_zamax(), bl1_zdots(), bl1_zgemv(), bl1_zinvscalv(), bl1_ztrsm(), bl1_ztrsv(), BLIS1_LEFT, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_UNIT_DIAG, FLA_Apply_pivots_ln_opz_var1(), FLA_MINUS_ONE, FLA_ONE, i, dcomplex::imag, dcomplex::real, and zzero.

Referenced by FLA_LU_piv_opt_var3().

◆ FLA_LU_piv_opz_var4()

FLA_Error FLA_LU_piv_opz_var4 ( int  m_A,
int  n_A,
dcomplex buff_A,
int  rs_A,
int  cs_A,
int buff_p,
int  inc_p 
)
470{
474 int min_m_n = min( m_A, n_A );
475 int i, is_null_pivot;
476
477 for ( i = 0; i < min_m_n; ++i )
478 {
480 dcomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
481 dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
482 dcomplex* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
483 dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
484 dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
485 dcomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
486 dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
487
488 int* pi1 = buff_p + i*inc_p;
489
490 int m_ahead = m_A - i - 1;
491 int n_ahead = n_A - i - 1;
492 int mn_behind = i;
493
494 /*------------------------------------------------------------*/
495
496 // FLA_Dots_external( FLA_MINUS_ONE, a10t, a01, FLA_ONE, alpha11 );
498 mn_behind,
499 buff_m1,
500 a10t, cs_A,
501 a01, rs_A,
502 buff_1,
503 alpha11 );
504
505 // FLA_Gemv_external( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, a01, FLA_ONE, a21 );
508 m_ahead,
509 mn_behind,
510 buff_m1,
511 A20, rs_A, cs_A,
512 a01, rs_A,
513 buff_1,
514 a21, rs_A );
515
516 // FLA_Merge_2x1( alpha11,
517 // a21, &aB1 );
518
519 // FLA_Amax_external( aB1, pi1 );
520 bl1_zamax( m_ahead + 1,
521 alpha11, rs_A,
522 pi1 );
523
524 // If a null pivot is encountered, return the index.
525 pivot_val =*(alpha11 + *pi1);
526
527 is_null_pivot = (pivot_val.real == zzero.real && pivot_val.imag == zzero.imag);
528 if ( is_null_pivot )
529 {
530 r_val = ( r_val == FLA_SUCCESS ? i : r_val );
531 }
532 else
533 {
534 // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, aB1 );
536 alpha11, rs_A, cs_A,
537 0,
538 0,
539 pi1, inc_p );
540
541 // FLA_Merge_2x1( a10t,
542 // A20, &AB0 );
543
544 // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, AB0 );
546 a10t, rs_A, cs_A,
547 0,
548 0,
549 pi1, inc_p );
550
551 // FLA_Merge_2x1( a12t,
552 // A22, &AB2 );
553
554 // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, AB2 );
556 a12t, rs_A, cs_A,
557 0,
558 0,
559 pi1, inc_p );
560 }
561
562 // FLA_Gemv_external( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, a10t, FLA_ONE, a12t );
565 mn_behind,
566 n_ahead,
567 buff_m1,
568 A02, rs_A, cs_A,
569 a10t, cs_A,
570 buff_1,
571 a12t, cs_A );
572
573 if ( ! is_null_pivot )
574 {
575 // FLA_Inv_scal_external( alpha11, a21 );
577 m_ahead,
578 alpha11,
579 a21, rs_A );
580 }
581 /*------------------------------------------------------------*/
582
583 }
584
585 return r_val;
586}

References bl1_zamax(), bl1_zdots(), bl1_zgemv(), bl1_zinvscalv(), BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_Apply_pivots_ln_opz_var1(), FLA_MINUS_ONE, FLA_ONE, i, dcomplex::imag, dcomplex::real, and zzero.

Referenced by FLA_LU_piv_opt_var4().

◆ FLA_LU_piv_opz_var5()

FLA_Error FLA_LU_piv_opz_var5 ( int  m_A,
int  n_A,
dcomplex buff_A,
int  rs_A,
int  cs_A,
int buff_p,
int  inc_p 
)
318{
321 int min_m_n = min( m_A, n_A );
322 int i;
323
324 for ( i = 0; i < min_m_n; ++i )
325 {
327 dcomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
328 dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
329 dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
330 dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
331 dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
332
333 int* pi1 = buff_p + i*inc_p;
334
335 int m_ahead = m_A - i - 1;
336 int n_ahead = n_A - i - 1;
337
338 /*------------------------------------------------------------*/
339
340 // FLA_Merge_2x1( alpha11,
341 // a21, &aB1 );
342
343 // FLA_Amax_external( aB1, pi1 );
344 bl1_zamax( m_ahead + 1,
345 alpha11, rs_A,
346 pi1 );
347
348 // If a null pivot is encountered, return the index.
349 pivot_val = *(alpha11 + *pi1);
350 if ( pivot_val.real == zzero.real &&
351 pivot_val.imag == zzero.imag ) r_val = ( r_val == FLA_SUCCESS ? i : r_val );
352 else
353 {
354 // FLA_Merge_1x2( ABL, ABR, &AB );
355
356 // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, AB );
358 a10t, rs_A, cs_A,
359 0,
360 0,
361 pi1, inc_p );
362
363 // FLA_Inv_scal_external( alpha11, a21 );
365 m_ahead,
366 alpha11,
367 a21, rs_A );
368 }
369 // FLA_Ger_external( FLA_MINUS_ONE, a21, a12t, A22 );
372 m_ahead,
373 n_ahead,
374 buff_m1,
375 a21, rs_A,
376 a12t, cs_A,
377 A22, rs_A, cs_A );
378
379 /*------------------------------------------------------------*/
380
381 }
382
383 return r_val;
384}
void bl1_zger(conj1_t conjx, conj1_t conjy, int m, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *a, int a_rs, int a_cs)
Definition bl1_ger.c:194

References bl1_zamax(), bl1_zger(), bl1_zinvscalv(), BLIS1_NO_CONJUGATE, FLA_Apply_pivots_ln_opz_var1(), FLA_MINUS_ONE, i, dcomplex::imag, dcomplex::real, and zzero.

Referenced by FLA_LU_piv_opt_var5().

◆ FLA_LU_piv_unb_var3()

FLA_Error FLA_LU_piv_unb_var3 ( FLA_Obj  A,
FLA_Obj  p 
)
16{
17 FLA_Obj ATL, ATR, A00, a01, A02,
19 A20, a21, A22;
20
21 FLA_Obj AL, AR, A0, a1, A2;
22
23 FLA_Obj pT, p0,
24 pB, pi1,
25 p2;
26
28
29
30 FLA_Part_2x2( A, &ATL, &ATR,
31 &ABL, &ABR, 0, 0, FLA_TL );
32
33 FLA_Part_1x2( A, &AL, &AR, 0, FLA_LEFT );
34
35 FLA_Part_2x1( p, &pT,
36 &pB, 0, FLA_TOP );
37
38 while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) &&
40
41 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02,
42 /* ************* */ /* ************************** */
43 &a10t, /**/ &alpha11, &a12t,
44 ABL, /**/ ABR, &A20, /**/ &a21, &A22,
45 1, 1, FLA_BR );
46
47 FLA_Repart_1x2_to_1x3( AL, /**/ AR, &A0, /**/ &a1, &A2,
48 1, FLA_RIGHT );
49
51 /* ** */ /* *** */
52 &pi1,
53 pB, &p2, 1, FLA_BOTTOM );
54
55 /*------------------------------------------------------------*/
56
57 // Apply previously computed pivots
59
60 // a01 = trilu( A00 ) \ a01
62
63 // alpha11 = alpha11 - a10t * a01
65
66 // a21 = a21 - A20 * a01
68
69 // aB1 = / alpha11 \
70 // \ a21 /
72 a21, &aB1 );
73
74 // Determine pivot index
76
77 // Apply pivots to current column
79
80 // a21 = a21 / alpha11
82
83 // AB0 = / a10t \
84 // \ A20 /
86 A20, &AB0 );
87
88 // Apply pivots to previous columns
90
91 /*------------------------------------------------------------*/
92
93 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02,
94 a10t, alpha11, /**/ a12t,
95 /* ************** */ /* ************************ */
96 &ABL, /**/ &ABR, A20, a21, /**/ A22,
97 FLA_TL );
98
99 FLA_Cont_with_1x3_to_1x2( &AL, /**/ &AR, A0, a1, /**/ A2,
100 FLA_LEFT );
101
103 pi1,
104 /* ** */ /* *** */
105 &pB, p2, FLA_TOP );
106
107 }
108
109 if ( FLA_Obj_width( ATR ) > 0 )
110 {
111 /* Apply pivots to untouched columns */
113
114 /* ATR = trilu( ATL ) \ ATR */
117 FLA_ONE, ATL, ATR );
118 }
119
120 return FLA_SUCCESS;
121}
FLA_Error FLA_Amax_external(FLA_Obj x, FLA_Obj index)
Definition FLA_Amax_external.c:13
FLA_Error FLA_Inv_scal_external(FLA_Obj alpha, FLA_Obj A)
Definition FLA_Inv_scal_external.c:13
FLA_Error FLA_Dots_external(FLA_Obj alpha, FLA_Obj x, FLA_Obj y, FLA_Obj beta, FLA_Obj rho)
Definition FLA_Dots_external.c:13
FLA_Error FLA_Gemv_external(FLA_Trans transa, FLA_Obj alpha, FLA_Obj A, FLA_Obj x, FLA_Obj beta, FLA_Obj y)
Definition FLA_Gemv_external.c:13
FLA_Error FLA_Trsv_external(FLA_Uplo uplo, FLA_Trans transa, FLA_Diag diag, FLA_Obj A, FLA_Obj x)
Definition FLA_Trsv_external.c:13
FLA_Error FLA_Apply_pivots(FLA_Side side, FLA_Trans trans, FLA_Obj p, FLA_Obj A)
Definition FLA_Apply_pivots.c:15

References FLA_Amax_external(), FLA_Apply_pivots(), FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Dots_external(), FLA_Gemv_external(), FLA_Inv_scal_external(), FLA_Merge_2x1(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_Obj_width(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Trsm_external(), FLA_Trsv_external(), and i.

Referenced by FLA_LU_piv_internal().

◆ FLA_LU_piv_unb_var3b()

FLA_Error FLA_LU_piv_unb_var3b ( FLA_Obj  A,
FLA_Obj  p 
)
16{
17 FLA_Obj ATL, ATR, A00, a01, A02,
19 A20, a21, A22;
20
21 FLA_Obj pT, p0,
22 pB, pi1,
23 p2;
24
26
27
28 FLA_Part_2x2( A, &ATL, &ATR,
29 &ABL, &ABR, 0, 0, FLA_TL );
30
31 FLA_Part_2x1( p, &pT,
32 &pB, 0, FLA_TOP );
33
34 while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) &&
36
37 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02,
38 /* ************* */ /* ************************** */
39 &a10t, /**/ &alpha11, &a12t,
40 ABL, /**/ ABR, &A20, /**/ &a21, &A22,
41 1, 1, FLA_BR );
42
44 /* ** */ /* *** */
45 &pi1,
46 pB, &p2, 1, FLA_BOTTOM );
47
48 /*------------------------------------------------------------*/
49
50 // a01 = trilu( A00 ) \ a01
52
53 // alpha11 = alpha11 - a10t * a01
55
56 // a21 = a21 - A20 * a01
58
59 // aB1 = / alpha11 \
60 // \ a21 /
62 a21, &aB1 );
63
64 // Determine pivot index
66
67 // Apply pivots to current column
69
70 // AB0 = / a10t \
71 // \ A20 /
73 A20, &AB0 );
74
75 // AB2 = / a12t \
76 // \ A22 /
78 A22, &AB2 );
79
80 // Apply pivots to remaining columns
83
84 // a21 = a21 / alpha11
86
87 /*------------------------------------------------------------*/
88
89 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02,
90 a10t, alpha11, /**/ a12t,
91 /* ************** */ /* ************************ */
92 &ABL, /**/ &ABR, A20, a21, /**/ A22,
93 FLA_TL );
94
96 pi1,
97 /* ** */ /* *** */
98 &pB, p2, FLA_TOP );
99
100 }
101
102 if ( FLA_Obj_width( ATR ) > 0 )
103 {
104 /* ATR = trilu( ATL ) \ ATR */
107 FLA_ONE, ATL, ATR );
108 }
109
110 return FLA_SUCCESS;
111}

References FLA_Amax_external(), FLA_Apply_pivots(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Dots_external(), FLA_Gemv_external(), FLA_Inv_scal_external(), FLA_Merge_2x1(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_Obj_width(), FLA_ONE, FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Trsm_external(), FLA_Trsv_external(), and i.

◆ FLA_LU_piv_unb_var4()

FLA_Error FLA_LU_piv_unb_var4 ( FLA_Obj  A,
FLA_Obj  p 
)
16{
17 FLA_Obj ATL, ATR, A00, a01, A02,
19 A20, a21, A22;
20
21 FLA_Obj pT, p0,
22 pB, pi1,
23 p2;
24
26
27
28 FLA_Part_2x2( A, &ATL, &ATR,
29 &ABL, &ABR, 0, 0, FLA_TL );
30
31 FLA_Part_2x1( p, &pT,
32 &pB, 0, FLA_TOP );
33
34 while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) &&
36
37 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02,
38 /* ************* */ /* ************************** */
39 &a10t, /**/ &alpha11, &a12t,
40 ABL, /**/ ABR, &A20, /**/ &a21, &A22,
41 1, 1, FLA_BR );
42
44 /* ** */ /* *** */
45 &pi1,
46 pB, &p2, 1, FLA_BOTTOM );
47
48 /*------------------------------------------------------------*/
49
50 // alpha11 = alpha11 - a10t * a01
52
53 // a21 = a21 - A20 * a01
55
56 // aB1 = / alpha11 \
57 // \ a21 /
59 a21, &aB1 );
60
61 // Determine pivot index
63
64 // Apply pivots to current column
66
67 // AB0 = / a10t \
68 // \ A20 /
70 A20, &AB0 );
71
72 // AB2 = / a12t \
73 // \ A22 /
75 A22, &AB2 );
76
77 // Apply pivots to remaining columns
80
81 // a21 = a21 / alpha11
83
84 // a12t = a12t - a10t * A02
86
87 /*------------------------------------------------------------*/
88
89 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02,
90 a10t, alpha11, /**/ a12t,
91 /* ************** */ /* ************************ */
92 &ABL, /**/ &ABR, A20, a21, /**/ A22,
93 FLA_TL );
94
96 pi1,
97 /* ** */ /* *** */
98 &pB, p2, FLA_TOP );
99
100 }
101
102 return FLA_SUCCESS;
103}

References FLA_Amax_external(), FLA_Apply_pivots(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Dots_external(), FLA_Gemv_external(), FLA_Inv_scal_external(), FLA_Merge_2x1(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_Obj_width(), FLA_ONE, FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), and i.

Referenced by FLA_LU_piv_internal().

◆ FLA_LU_piv_unb_var5()

FLA_Error FLA_LU_piv_unb_var5 ( FLA_Obj  A,
FLA_Obj  p 
)
16{
17 FLA_Obj ATL, ATR, A00, a01, A02,
19 A20, a21, A22;
20
21 FLA_Obj pT, p0,
22 pB, pi1,
23 p2;
24
26
27
28 FLA_Part_2x2( A, &ATL, &ATR,
29 &ABL, &ABR, 0, 0, FLA_TL );
30
31 FLA_Part_2x1( p, &pT,
32 &pB, 0, FLA_TOP );
33
34 while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) &&
36
37 FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02,
38 /* ************* */ /* ************************** */
39 &a10t, /**/ &alpha11, &a12t,
40 ABL, /**/ ABR, &A20, /**/ &a21, &A22,
41 1, 1, FLA_BR );
42
44 /* ** */ /* *** */
45 &pi1,
46 pB, &p2, 1, FLA_BOTTOM );
47
48 /*------------------------------------------------------------*/
49
50 // aB1 = / alpha11 \
51 // \ a21 /
53 a21, &aB1 );
54
55 // Determine pivot index
57
58 // Apply pivots to current column
60
61 // a21 = a21 / alpha11
63
64 // AB0 = / a10t \
65 // \ A20 /
67 A20, &AB0 );
68
69 // Apply pivots to previous columns
71
72 // AB2 = / a12t \
73 // \ A22 /
75 A22, &AB2 );
76
77 // Apply pivots to remaining columns
79
80 // A22 = A22 - a21 * a12t
82
83 /*------------------------------------------------------------*/
84
85 FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02,
86 a10t, alpha11, /**/ a12t,
87 /* ************** */ /* ************************ */
88 &ABL, /**/ &ABR, A20, a21, /**/ A22,
89 FLA_TL );
90
92 pi1,
93 /* ** */ /* *** */
94 &pB, p2, FLA_TOP );
95
96 }
97
98 return FLA_SUCCESS;
99}
FLA_Error FLA_Ger_external(FLA_Obj alpha, FLA_Obj x, FLA_Obj y, FLA_Obj A)
Definition FLA_Ger_external.c:13

References FLA_Amax_external(), FLA_Apply_pivots(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Ger_external(), FLA_Inv_scal_external(), FLA_Merge_2x1(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_Obj_width(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), and i.

Referenced by FLA_LU_piv_internal().