libflame revision_anchor
Functions
FLA_Bsvd_v_opt_var1.c File Reference

(r)

Functions

FLA_Error FLA_Bsvd_v_opt_var1 (dim_t n_iter_max, FLA_Obj d, FLA_Obj e, FLA_Obj G, FLA_Obj H, FLA_Obj U, FLA_Obj V, dim_t b_alg)
 
FLA_Error FLA_Bsvd_v_ops_var1 (int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_H, int rs_H, int cs_H, float *buff_U, int rs_U, int cs_U, float *buff_V, int rs_V, int cs_V, int b_alg)
 
FLA_Error FLA_Bsvd_v_opd_var1 (int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_H, int rs_H, int cs_H, double *buff_U, int rs_U, int cs_U, double *buff_V, int rs_V, int cs_V, int b_alg)
 
FLA_Error FLA_Bsvd_v_opc_var1 (int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_H, int rs_H, int cs_H, scomplex *buff_U, int rs_U, int cs_U, scomplex *buff_V, int rs_V, int cs_V, int b_alg)
 
FLA_Error FLA_Bsvd_v_opz_var1 (int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_H, int rs_H, int cs_H, dcomplex *buff_U, int rs_U, int cs_U, dcomplex *buff_V, int rs_V, int cs_V, int b_alg)
 

Function Documentation

◆ FLA_Bsvd_v_opc_var1()

FLA_Error FLA_Bsvd_v_opc_var1 ( int  min_m_n,
int  m_U,
int  m_V,
int  n_GH,
int  n_iter_max,
float buff_d,
int  inc_d,
float buff_e,
int  inc_e,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_H,
int  rs_H,
int  cs_H,
scomplex buff_U,
int  rs_U,
int  cs_U,
scomplex buff_V,
int  rs_V,
int  cs_V,
int  b_alg 
)
686{
687 scomplex one = bl1_c1();
688 float rzero = bl1_s0();
689
690 int maxitr = 6;
691
692 float eps;
693 float tolmul;
694 float tol;
695 float thresh;
696
697 scomplex* G;
698 scomplex* H;
699 float* d1;
700 float* e1;
701 int r_val;
702 int done;
703 int m_GH_sweep_max;
704 int ij_begin;
705 int ijTL, ijBR;
706 int m_A11;
707 int n_iter_perf;
708 int n_UV_apply;
710 int n_deflations;
711 int n_iter_prev;
713
714 // Compute some convergence constants.
716 tolmul = max( 10.0F, min( 100.0F, powf( eps, -0.125F ) ) );
718 tolmul,
719 maxitr,
720 buff_d, inc_d,
721 buff_e, inc_e,
722 &tol,
723 &thresh );
724
725 // Initialize our completion flag.
726 done = FALSE;
727
728 // Initialize a counter that holds the maximum number of rows of G
729 // and H that we would need to initialize for the next sweep.
731
732 // Initialize a counter for the total number of iterations performed.
733 n_iter_prev = 0;
734
735 // Iterate until the matrix has completely deflated.
736 for ( total_deflations = 0; done != TRUE; )
737 {
738
739 // Initialize G and H to contain only identity rotations.
741 n_GH,
742 &one,
743 buff_G, rs_G, cs_G );
745 n_GH,
746 &one,
747 buff_H, rs_H, cs_H );
748
749 // Keep track of the maximum number of iterations performed in the
750 // current sweep. This is used when applying the sweep's Givens
751 // rotations.
753
754 // Perform a sweep: Move through the matrix and perform a bidiagonal
755 // SVD on each non-zero submatrix that is encountered. During the
756 // first time through, ijTL will be 0 and ijBR will be min_m_n - 1.
757 for ( ij_begin = 0; ij_begin < min_m_n; )
758 {
759 // Search for the first submatrix along the diagonal that is
760 // bounded by zeroes (or endpoints of the matrix). If no
761 // submatrix is found (ie: if the entire superdiagonal is zero
762 // then FLA_FAILURE is returned. This function also inspects
763 // superdiagonal elements for proximity to zero. If a given
764 // element is close enough to zero, then it is deemed
765 // converged and manually set to zero.
767 ij_begin,
768 buff_d, inc_d,
769 buff_e, inc_e,
770 &ijTL,
771 &ijBR );
772
773 // Verify that a submatrix was found. If one was not found,
774 // then we are done with the current sweep. Furthermore, if
775 // a submatrix was not found AND we began our search at the
776 // beginning of the matrix (ie: ij_begin == 0), then the
777 // matrix has completely deflated and so we are done with
778 // Francis step iteration.
779 if ( r_val == FLA_FAILURE )
780 {
781 if ( ij_begin == 0 )
782 {
783 done = TRUE;
784 }
785
786 // Break out of the current sweep so we can apply the last
787 // remaining Givens rotations.
788 break;
789 }
790
791 // If we got this far, then:
792 // (a) ijTL refers to the index of the first non-zero
793 // superdiagonal along the diagonal, and
794 // (b) ijBR refers to either:
795 // - the first zero element that occurs after ijTL, or
796 // - the the last diagonal element.
797 // Note that ijTL and ijBR also correspond to the first and
798 // last diagonal elements of the submatrix of interest. Thus,
799 // we may compute the dimension of this submatrix as:
800 m_A11 = ijBR - ijTL + 1;
801
802 // Adjust ij_begin, which gets us ready for the next submatrix
803 // search in the current sweep.
804 ij_begin = ijBR + 1;
805
806 // Index to the submatrices upon which we will operate.
807 d1 = buff_d + ijTL * inc_d;
808 e1 = buff_e + ijTL * inc_e;
809 G = buff_G + ijTL * rs_G;
810 H = buff_H + ijTL * rs_H;
811
812 // Search for a batch of singular values, recursing on deflated
813 // subproblems whenever a split occurs. Iteration continues as
814 // long as
815 // (a) there is still matrix left to operate on, and
816 // (b) the number of iterations performed in this batch is
817 // less than n_G.
818 // If/when either of the two above conditions fails to hold,
819 // the function returns.
821 n_GH,
822 ijTL,
823 tol,
824 thresh,
825 d1, inc_d,
826 e1, inc_e,
827 G, rs_G, cs_G,
828 H, rs_H, cs_H,
829 &n_iter_perf );
830
831 // Record the number of deflations that were observed.
833
834 // Update the maximum number of iterations performed in the
835 // current sweep.
837
838 // Store the most recent value of ijBR in m_G_sweep_max.
839 // When the sweep is done, this value will contain the minimum
840 // number of rows of G and H we can apply and safely include all
841 // non-identity rotations that were computed during the
842 // singular value searches.
844
845 // Make sure we haven't exceeded our maximum iteration count.
846 if ( n_iter_prev >= n_iter_max * min_m_n )
847 {
848 FLA_Abort();
849 //return FLA_FAILURE;
850 }
851 }
852
853 // The sweep is complete. Now we must apply the Givens rotations
854 // that were accumulated during the sweep.
855
856 // Recall that the number of columns of U and V to which we apply
857 // rotations is one more than the number of rotations.
859
860 // Apply the Givens rotations. Note that we only apply k sets of
861 // rotations, where k = n_iter_perf_sweep_max. Also note that we only
862 // apply to n_UV_apply columns of U and V since this is the most we
863 // need to touch given the most recent value stored to m_GH_sweep_max.
864 //FLA_Apply_G_rf_blc_var5( n_iter_perf_sweep_max,
866 //FLA_Apply_G_rf_blz_var9( n_iter_perf_sweep_max,
867 //FLA_Apply_G_rf_blz_var6( n_iter_perf_sweep_max,
868 m_U,
870 buff_G, rs_G, cs_G,
871 buff_U, rs_U, cs_U,
872 b_alg );
873 //FLA_Apply_G_rf_blc_var5( n_iter_perf_sweep_max,
875 //FLA_Apply_G_rf_blz_var9( n_iter_perf_sweep_max,
876 //FLA_Apply_G_rf_blz_var6( n_iter_perf_sweep_max,
877 m_V,
879 buff_H, rs_H, cs_H,
880 buff_V, rs_V, cs_V,
881 b_alg );
882
883 // Increment the total number of iterations previously performed.
885 }
886
887 // Make all the singular values positive.
888 {
889 int i;
890 float minus_one = bl1_sm1();
891
892 for ( i = 0; i < min_m_n; ++i )
893 {
894 if ( buff_d[ (i )*inc_d ] < rzero )
895 {
896 buff_d[ (i )*inc_d ] = -buff_d[ (i )*inc_d ];
897
898 // Scale the right singular vectors.
900 m_V,
901 &minus_one,
902 buff_V + (i )*cs_V, rs_V );
903 }
904 }
905 }
906
907 return FLA_SUCCESS;
908}
FLA_Error FLA_Apply_G_rf_blc_var3(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg)
Definition FLA_Apply_G_rf_blk_var3.c:157
FLA_Error FLA_Bsvd_compute_tol_thresh_ops(int n_A, float tolmul, float maxitr, float *buff_d, int inc_d, float *buff_e, int inc_e, float *tol, float *thresh)
Definition FLA_Bsvd_compute_tol_thresh.c:78
FLA_Error FLA_Bsvd_find_submatrix_ops(int mn_A, int ij_begin, float *buff_d, int inc_d, float *buff_e, int inc_e, int *ijTL, int *ijBR)
Definition FLA_Bsvd_find_submatrix.c:14
FLA_Error FLA_Bsvd_iteracc_v_ops_var1(int m_A, int n_GH, int ijTL, float tol, float thresh, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_H, int rs_H, int cs_H, int *n_iter_perf)
Definition FLA_Bsvd_iteracc_v_opt_var1.c:13
void FLA_Abort(void)
Definition FLA_Error.c:248
float FLA_Mach_params_ops(FLA_Machval machval)
Definition FLA_Mach_params.c:47
int i
Definition bl1_axmyv2.c:145
void bl1_csscalv(conj1_t conj, int n, float *alpha, scomplex *x, int incx)
Definition bl1_scalv.c:35
float bl1_s0(void)
Definition bl1_constants.c:111
scomplex bl1_c1(void)
Definition bl1_constants.c:61
float bl1_sm1(void)
Definition bl1_constants.c:175
void bl1_csetm(int m, int n, scomplex *sigma, scomplex *a, int a_rs, int a_cs)
Definition bl1_setm.c:61
@ BLIS1_NO_CONJUGATE
Definition blis_type_defs.h:81
Definition blis_type_defs.h:133

References bl1_c1(), bl1_csetm(), bl1_csscalv(), bl1_s0(), bl1_sm1(), BLIS1_NO_CONJUGATE, FLA_Abort(), FLA_Apply_G_rf_blc_var3(), FLA_Bsvd_compute_tol_thresh_ops(), FLA_Bsvd_find_submatrix_ops(), FLA_Bsvd_iteracc_v_ops_var1(), FLA_Mach_params_ops(), and i.

Referenced by FLA_Bsvd_v_opt_var1().

◆ FLA_Bsvd_v_opd_var1()

FLA_Error FLA_Bsvd_v_opd_var1 ( int  min_m_n,
int  m_U,
int  m_V,
int  n_GH,
int  n_iter_max,
double buff_d,
int  inc_d,
double buff_e,
int  inc_e,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_H,
int  rs_H,
int  cs_H,
double buff_U,
int  rs_U,
int  cs_U,
double buff_V,
int  rs_V,
int  cs_V,
int  b_alg 
)
409{
410 dcomplex one = bl1_z1();
411 double rzero = bl1_d0();
412
413 int maxitr = 6;
414
415 double eps;
416 double tolmul;
417 double tol;
418 double thresh;
419
420 dcomplex* G;
421 dcomplex* H;
422 double* d1;
423 double* e1;
424 int r_val;
425 int done;
426 int m_GH_sweep_max;
427 int ij_begin;
428 int ijTL, ijBR;
429 int m_A11;
430 int n_iter_perf;
431 int n_UV_apply;
433 int n_deflations;
434 int n_iter_prev;
436
437 // Compute some convergence constants.
439 tolmul = max( 10.0, min( 100.0, pow( eps, -0.125 ) ) );
441 tolmul,
442 maxitr,
443 buff_d, inc_d,
444 buff_e, inc_e,
445 &tol,
446 &thresh );
447#ifdef PRINTF
448 printf( "FLA_Bsvd_v_opd_var1: tolmul = %12.6e\n", tolmul );
449 printf( "FLA_Bsvd_v_opd_var1: tol = %12.6e\n", tol );
450 printf( "FLA_Bsvd_v_opd_var1: thresh = %12.6e\n", thresh );
451#endif
452
453 // Initialize our completion flag.
454 done = FALSE;
455
456 // Initialize a counter that holds the maximum number of rows of G
457 // and H that we would need to initialize for the next sweep.
459
460 // Initialize a counter for the total number of iterations performed.
461 n_iter_prev = 0;
462
463 // Iterate until the matrix has completely deflated.
464 for ( total_deflations = 0; done != TRUE; )
465 {
466
467 // Initialize G and H to contain only identity rotations.
469 n_GH,
470 &one,
471 buff_G, rs_G, cs_G );
473 n_GH,
474 &one,
475 buff_H, rs_H, cs_H );
476
477 // Keep track of the maximum number of iterations performed in the
478 // current sweep. This is used when applying the sweep's Givens
479 // rotations.
481
482 // Perform a sweep: Move through the matrix and perform a bidiagonal
483 // SVD on each non-zero submatrix that is encountered. During the
484 // first time through, ijTL will be 0 and ijBR will be min_m_n - 1.
485 for ( ij_begin = 0; ij_begin < min_m_n; )
486 {
487
488#ifdef PRINTF
489 if ( ij_begin == 0 )
490 printf( "FLA_Bsvd_v_opd_var1: beginning new sweep (ij_begin = %d)\n", ij_begin );
491#endif
492
493 // Search for the first submatrix along the diagonal that is
494 // bounded by zeroes (or endpoints of the matrix). If no
495 // submatrix is found (ie: if the entire superdiagonal is zero
496 // then FLA_FAILURE is returned. This function also inspects
497 // superdiagonal elements for proximity to zero. If a given
498 // element is close enough to zero, then it is deemed
499 // converged and manually set to zero.
501 ij_begin,
502 buff_d, inc_d,
503 buff_e, inc_e,
504 &ijTL,
505 &ijBR );
506
507 // Verify that a submatrix was found. If one was not found,
508 // then we are done with the current sweep. Furthermore, if
509 // a submatrix was not found AND we began our search at the
510 // beginning of the matrix (ie: ij_begin == 0), then the
511 // matrix has completely deflated and so we are done with
512 // Francis step iteration.
513 if ( r_val == FLA_FAILURE )
514 {
515 if ( ij_begin == 0 )
516 {
517#ifdef PRINTF
518 printf( "FLA_Bsvd_v_opd_var1: superdiagonal is completely zero.\n" );
519 printf( "FLA_Bsvd_v_opd_var1: Francis iteration is done!\n" );
520#endif
521 done = TRUE;
522 }
523
524 // Break out of the current sweep so we can apply the last
525 // remaining Givens rotations.
526 break;
527 }
528
529 // If we got this far, then:
530 // (a) ijTL refers to the index of the first non-zero
531 // superdiagonal along the diagonal, and
532 // (b) ijBR refers to either:
533 // - the first zero element that occurs after ijTL, or
534 // - the the last diagonal element.
535 // Note that ijTL and ijBR also correspond to the first and
536 // last diagonal elements of the submatrix of interest. Thus,
537 // we may compute the dimension of this submatrix as:
538 m_A11 = ijBR - ijTL + 1;
539
540#ifdef PRINTF
541 printf( "FLA_Bsvd_v_opd_var1: ij_begin = %d\n", ij_begin );
542 printf( "FLA_Bsvd_v_opd_var1: ijTL = %d\n", ijTL );
543 printf( "FLA_Bsvd_v_opd_var1: ijBR = %d\n", ijBR );
544 printf( "FLA_Bsvd_v_opd_var1: m_A11 = %d\n", m_A11 );
545#endif
546
547 // Adjust ij_begin, which gets us ready for the next submatrix
548 // search in the current sweep.
549 ij_begin = ijBR + 1;
550
551 // Index to the submatrices upon which we will operate.
552 d1 = buff_d + ijTL * inc_d;
553 e1 = buff_e + ijTL * inc_e;
554 G = buff_G + ijTL * rs_G;
555 H = buff_H + ijTL * rs_H;
556
557 // Search for a batch of singular values, recursing on deflated
558 // subproblems whenever a split occurs. Iteration continues as
559 // long as
560 // (a) there is still matrix left to operate on, and
561 // (b) the number of iterations performed in this batch is
562 // less than n_G.
563 // If/when either of the two above conditions fails to hold,
564 // the function returns.
566 n_GH,
567 ijTL,
568 tol,
569 thresh,
570 d1, inc_d,
571 e1, inc_e,
572 G, rs_G, cs_G,
573 H, rs_H, cs_H,
574 &n_iter_perf );
575
576 // Record the number of deflations that were observed.
578
579 // Update the maximum number of iterations performed in the
580 // current sweep.
582
583#ifdef PRINTF
584 printf( "FLA_Bsvd_v_opd_var1: deflations observed = %d\n", n_deflations );
585 printf( "FLA_Bsvd_v_opd_var1: total deflations observed = %d\n", total_deflations );
586 printf( "FLA_Bsvd_v_opd_var1: num iterations performed = %d\n", n_iter_perf );
587#endif
588
589 // Store the most recent value of ijBR in m_G_sweep_max.
590 // When the sweep is done, this value will contain the minimum
591 // number of rows of G and H we can apply and safely include all
592 // non-identity rotations that were computed during the
593 // singular value searches.
595
596 // Make sure we haven't exceeded our maximum iteration count.
597 if ( n_iter_prev >= n_iter_max * min_m_n )
598 {
599#ifdef PRINTF
600 printf( "FLA_Bsvd_v_opd_var1: reached maximum total number of iterations: %d\n", n_iter_prev );
601#endif
602 FLA_Abort();
603 //return FLA_FAILURE;
604 }
605 }
606
607 // The sweep is complete. Now we must apply the Givens rotations
608 // that were accumulated during the sweep.
609
610 // Recall that the number of columns of U and V to which we apply
611 // rotations is one more than the number of rotations.
613
614#ifdef PRINTF
615 printf( "FLA_Bsvd_v_opd_var1: applying %d sets of Givens rotations\n", n_iter_perf_sweep_max );
616 printf( "FLA_Bsvd_v_opd_var1: m_U = %d\n", m_U );
617 printf( "FLA_Bsvd_v_opd_var1: m_V = %d\n", m_V );
618 printf( "FLA_Bsvd_v_opd_var1: napp= %d\n", n_UV_apply );
619#endif
620 // Apply the Givens rotations. Note that we only apply k sets of
621 // rotations, where k = n_iter_perf_sweep_max. Also note that we only
622 // apply to n_UV_apply columns of U and V since this is the most we
623 // need to touch given the most recent value stored to m_GH_sweep_max.
624 //FLA_Apply_G_rf_bld_var5( n_iter_perf_sweep_max,
626 //FLA_Apply_G_rf_bld_var9( n_iter_perf_sweep_max,
627 //FLA_Apply_G_rf_bld_var6( n_iter_perf_sweep_max,
628 m_U,
630 buff_G, rs_G, cs_G,
631 buff_U, rs_U, cs_U,
632 b_alg );
633 //FLA_Apply_G_rf_bld_var5( n_iter_perf_sweep_max,
635 //FLA_Apply_G_rf_bld_var9( n_iter_perf_sweep_max,
636 //FLA_Apply_G_rf_bld_var6( n_iter_perf_sweep_max,
637 m_V,
639 buff_H, rs_H, cs_H,
640 buff_V, rs_V, cs_V,
641 b_alg );
642
643 // Increment the total number of iterations previously performed.
645
646#ifdef PRINTF
647 printf( "FLA_Bsvd_v_opd_var1: total number of iterations performed: %d\n", n_iter_prev );
648#endif
649 }
650
651 // Make all the singular values positive.
652 {
653 int i;
654 double minus_one = bl1_dm1();
655
656 for ( i = 0; i < min_m_n; ++i )
657 {
658 if ( buff_d[ (i )*inc_d ] < rzero )
659 {
660 buff_d[ (i )*inc_d ] = -buff_d[ (i )*inc_d ];
661
662 // Scale the right singular vectors.
664 m_V,
665 &minus_one,
666 buff_V + (i )*cs_V, rs_V );
667 }
668 }
669 }
670
671 return n_iter_prev;
672}
FLA_Error FLA_Apply_G_rf_bld_var3(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg)
Definition FLA_Apply_G_rf_blk_var3.c:128
FLA_Error FLA_Bsvd_compute_tol_thresh_opd(int n_A, double tolmul, double maxitr, double *buff_d, int inc_d, double *buff_e, int inc_e, double *tol, double *thresh)
Definition FLA_Bsvd_compute_tol_thresh.c:137
FLA_Error FLA_Bsvd_find_submatrix_opd(int mn_A, int ij_begin, double *buff_d, int inc_d, double *buff_e, int inc_e, int *ijTL, int *ijBR)
Definition FLA_Bsvd_find_submatrix.c:73
FLA_Error FLA_Bsvd_iteracc_v_opd_var1(int m_A, int n_GH, int ijTL, double tol, double thresh, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_H, int rs_H, int cs_H, int *n_iter_perf)
Definition FLA_Bsvd_iteracc_v_opt_var1.c:176
double FLA_Mach_params_opd(FLA_Machval machval)
Definition FLA_Mach_params.c:74
void bl1_dscalv(conj1_t conj, int n, double *alpha, double *x, int incx)
Definition bl1_scalv.c:24
double bl1_dm1(void)
Definition bl1_constants.c:182
dcomplex bl1_z1(void)
Definition bl1_constants.c:69
double bl1_d0(void)
Definition bl1_constants.c:118
void bl1_zsetm(int m, int n, dcomplex *sigma, dcomplex *a, int a_rs, int a_cs)
Definition bl1_setm.c:78
Definition blis_type_defs.h:138

References bl1_d0(), bl1_dm1(), bl1_dscalv(), bl1_z1(), bl1_zsetm(), BLIS1_NO_CONJUGATE, FLA_Abort(), FLA_Apply_G_rf_bld_var3(), FLA_Bsvd_compute_tol_thresh_opd(), FLA_Bsvd_find_submatrix_opd(), FLA_Bsvd_iteracc_v_opd_var1(), FLA_Mach_params_opd(), and i.

Referenced by FLA_Bsvd_v_opt_var1().

◆ FLA_Bsvd_v_ops_var1()

FLA_Error FLA_Bsvd_v_ops_var1 ( int  min_m_n,
int  m_U,
int  m_V,
int  n_GH,
int  n_iter_max,
float buff_d,
int  inc_d,
float buff_e,
int  inc_e,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_H,
int  rs_H,
int  cs_H,
float buff_U,
int  rs_U,
int  cs_U,
float buff_V,
int  rs_V,
int  cs_V,
int  b_alg 
)
169{
170 scomplex one = bl1_c1();
171 float rzero = bl1_s0();
172
173 int maxitr = 6;
174
175 float eps;
176 float tolmul;
177 float tol;
178 float thresh;
179
180 scomplex* G;
181 scomplex* H;
182 float* d1;
183 float* e1;
184 int r_val;
185 int done;
186 int m_GH_sweep_max;
187 int ij_begin;
188 int ijTL, ijBR;
189 int m_A11;
190 int n_iter_perf;
191 int n_UV_apply;
193 int n_deflations;
194 int n_iter_prev;
196
197 // Compute some convergence constants.
199 tolmul = max( 10.0F, min( 100.0F, powf( eps, -0.125F ) ) );
201 tolmul,
202 maxitr,
203 buff_d, inc_d,
204 buff_e, inc_e,
205 &tol,
206 &thresh );
207
208 // Initialize our completion flag.
209 done = FALSE;
210
211 // Initialize a counter that holds the maximum number of rows of G
212 // and H that we would need to initialize for the next sweep.
214
215 // Initialize a counter for the total number of iterations performed.
216 n_iter_prev = 0;
217
218 // Iterate until the matrix has completely deflated.
219 for ( total_deflations = 0; done != TRUE; )
220 {
221
222 // Initialize G and H to contain only identity rotations.
224 n_GH,
225 &one,
226 buff_G, rs_G, cs_G );
228 n_GH,
229 &one,
230 buff_H, rs_H, cs_H );
231
232 // Keep track of the maximum number of iterations performed in the
233 // current sweep. This is used when applying the sweep's Givens
234 // rotations.
236
237 // Perform a sweep: Move through the matrix and perform a bidiagonal
238 // SVD on each non-zero submatrix that is encountered. During the
239 // first time through, ijTL will be 0 and ijBR will be min_m_n - 1.
240 for ( ij_begin = 0; ij_begin < min_m_n; )
241 {
242
243 // Search for the first submatrix along the diagonal that is
244 // bounded by zeroes (or endpoints of the matrix). If no
245 // submatrix is found (ie: if the entire superdiagonal is zero
246 // then FLA_FAILURE is returned. This function also inspects
247 // superdiagonal elements for proximity to zero. If a given
248 // element is close enough to zero, then it is deemed
249 // converged and manually set to zero.
251 ij_begin,
252 buff_d, inc_d,
253 buff_e, inc_e,
254 &ijTL,
255 &ijBR );
256
257 // Verify that a submatrix was found. If one was not found,
258 // then we are done with the current sweep. Furthermore, if
259 // a submatrix was not found AND we began our search at the
260 // beginning of the matrix (ie: ij_begin == 0), then the
261 // matrix has completely deflated and so we are done with
262 // Francis step iteration.
263 if ( r_val == FLA_FAILURE )
264 {
265 if ( ij_begin == 0 )
266 {
267 done = TRUE;
268 }
269
270 // Break out of the current sweep so we can apply the last
271 // remaining Givens rotations.
272 break;
273 }
274
275 // If we got this far, then:
276 // (a) ijTL refers to the index of the first non-zero
277 // superdiagonal along the diagonal, and
278 // (b) ijBR refers to either:
279 // - the first zero element that occurs after ijTL, or
280 // - the the last diagonal element.
281 // Note that ijTL and ijBR also correspond to the first and
282 // last diagonal elements of the submatrix of interest. Thus,
283 // we may compute the dimension of this submatrix as:
284 m_A11 = ijBR - ijTL + 1;
285
286 // Adjust ij_begin, which gets us ready for the next submatrix
287 // search in the current sweep.
288 ij_begin = ijBR + 1;
289
290 // Index to the submatrices upon which we will operate.
291 d1 = buff_d + ijTL * inc_d;
292 e1 = buff_e + ijTL * inc_e;
293 G = buff_G + ijTL * rs_G;
294 H = buff_H + ijTL * rs_H;
295
296 // Search for a batch of singular values, recursing on deflated
297 // subproblems whenever a split occurs. Iteration continues as
298 // long as
299 // (a) there is still matrix left to operate on, and
300 // (b) the number of iterations performed in this batch is
301 // less than n_G.
302 // If/when either of the two above conditions fails to hold,
303 // the function returns.
305 n_GH,
306 ijTL,
307 tol,
308 thresh,
309 d1, inc_d,
310 e1, inc_e,
311 G, rs_G, cs_G,
312 H, rs_H, cs_H,
313 &n_iter_perf );
314
315 // Record the number of deflations that were observed.
317
318 // Update the maximum number of iterations performed in the
319 // current sweep.
321
322 // Store the most recent value of ijBR in m_G_sweep_max.
323 // When the sweep is done, this value will contain the minimum
324 // number of rows of G and H we can apply and safely include all
325 // non-identity rotations that were computed during the
326 // singular value searches.
328
329 // Make sure we haven't exceeded our maximum iteration count.
330 if ( n_iter_prev >= n_iter_max * min_m_n )
331 {
332 FLA_Abort();
333 //return FLA_FAILURE;
334 }
335 }
336
337 // The sweep is complete. Now we must apply the Givens rotations
338 // that were accumulated during the sweep.
339
340 // Recall that the number of columns of U and V to which we apply
341 // rotations is one more than the number of rotations.
343
344 // Apply the Givens rotations. Note that we only apply k sets of
345 // rotations, where k = n_iter_perf_sweep_max. Also note that we only
346 // apply to n_UV_apply columns of U and V since this is the most we
347 // need to touch given the most recent value stored to m_GH_sweep_max.
348 //FLA_Apply_G_rf_bls_var5( n_iter_perf_sweep_max,
350 //FLA_Apply_G_rf_bld_var9( n_iter_perf_sweep_max,
351 //FLA_Apply_G_rf_bld_var6( n_iter_perf_sweep_max,
352 m_U,
354 buff_G, rs_G, cs_G,
355 buff_U, rs_U, cs_U,
356 b_alg );
357 //FLA_Apply_G_rf_blc_var5( n_iter_perf_sweep_max,
359 //FLA_Apply_G_rf_bld_var9( n_iter_perf_sweep_max,
360 //FLA_Apply_G_rf_bld_var6( n_iter_perf_sweep_max,
361 m_V,
363 buff_H, rs_H, cs_H,
364 buff_V, rs_V, cs_V,
365 b_alg );
366
367 // Increment the total number of iterations previously performed.
369
370 }
371
372 // Make all the singular values positive.
373 {
374 int i;
375 float minus_one = bl1_sm1();
376
377 for ( i = 0; i < min_m_n; ++i )
378 {
379 if ( buff_d[ (i )*inc_d ] < rzero )
380 {
381 buff_d[ (i )*inc_d ] = -buff_d[ (i )*inc_d ];
382
383 // Scale the right singular vectors.
385 m_V,
386 &minus_one,
387 buff_V + (i )*cs_V, rs_V );
388 }
389 }
390 }
391
392 return n_iter_prev;
393}
FLA_Error FLA_Apply_G_rf_bls_var3(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg)
Definition FLA_Apply_G_rf_blk_var3.c:99
void bl1_sscalv(conj1_t conj, int n, float *alpha, float *x, int incx)
Definition bl1_scalv.c:13

References bl1_c1(), bl1_csetm(), bl1_s0(), bl1_sm1(), bl1_sscalv(), BLIS1_NO_CONJUGATE, FLA_Abort(), FLA_Apply_G_rf_bls_var3(), FLA_Bsvd_compute_tol_thresh_ops(), FLA_Bsvd_find_submatrix_ops(), FLA_Bsvd_iteracc_v_ops_var1(), FLA_Mach_params_ops(), and i.

Referenced by FLA_Bsvd_v_opt_var1().

◆ FLA_Bsvd_v_opt_var1()

FLA_Error FLA_Bsvd_v_opt_var1 ( dim_t  n_iter_max,
FLA_Obj  d,
FLA_Obj  e,
FLA_Obj  G,
FLA_Obj  H,
FLA_Obj  U,
FLA_Obj  V,
dim_t  b_alg 
)
16{
18 FLA_Datatype datatype;
19 int m_U, m_V, n_GH;
20 int inc_d;
21 int inc_e;
22 int rs_G, cs_G;
23 int rs_H, cs_H;
24 int rs_U, cs_U;
25 int rs_V, cs_V;
26
27 datatype = FLA_Obj_datatype( U );
28
29 m_U = FLA_Obj_length( U );
30 m_V = FLA_Obj_length( V );
31 n_GH = FLA_Obj_width( G );
32
35
38
41
44
47
48
49 switch ( datatype )
50 {
51 case FLA_FLOAT:
52 {
53 float* buff_d = FLA_FLOAT_PTR( d );
54 float* buff_e = FLA_FLOAT_PTR( e );
57 float* buff_U = FLA_FLOAT_PTR( U );
58 float* buff_V = FLA_FLOAT_PTR( V );
59
61 m_U,
62 m_V,
63 n_GH,
71 b_alg );
72
73 break;
74 }
75
76 case FLA_DOUBLE:
77 {
78 double* buff_d = FLA_DOUBLE_PTR( d );
79 double* buff_e = FLA_DOUBLE_PTR( e );
82 double* buff_U = FLA_DOUBLE_PTR( U );
83 double* buff_V = FLA_DOUBLE_PTR( V );
84
86 m_U,
87 m_V,
88 n_GH,
96 b_alg );
97
98 break;
99 }
100
101 case FLA_COMPLEX:
102 {
103 float* buff_d = FLA_FLOAT_PTR( d );
104 float* buff_e = FLA_FLOAT_PTR( e );
109
111 m_U,
112 m_V,
113 n_GH,
115 buff_d, inc_d,
116 buff_e, inc_e,
117 buff_G, rs_G, cs_G,
118 buff_H, rs_H, cs_H,
119 buff_U, rs_U, cs_U,
120 buff_V, rs_V, cs_V,
121 b_alg );
122
123 break;
124 }
125
127 {
128 double* buff_d = FLA_DOUBLE_PTR( d );
129 double* buff_e = FLA_DOUBLE_PTR( e );
134
136 m_U,
137 m_V,
138 n_GH,
140 buff_d, inc_d,
141 buff_e, inc_e,
142 buff_G, rs_G, cs_G,
143 buff_H, rs_H, cs_H,
144 buff_U, rs_U, cs_U,
145 buff_V, rs_V, cs_V,
146 b_alg );
147
148 break;
149 }
150 }
151
152 return r_val;
153}
FLA_Error FLA_Bsvd_v_opd_var1(int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_H, int rs_H, int cs_H, double *buff_U, int rs_U, int cs_U, double *buff_V, int rs_V, int cs_V, int b_alg)
Definition FLA_Bsvd_v_opt_var1.c:397
FLA_Error FLA_Bsvd_v_opc_var1(int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_H, int rs_H, int cs_H, scomplex *buff_U, int rs_U, int cs_U, scomplex *buff_V, int rs_V, int cs_V, int b_alg)
Definition FLA_Bsvd_v_opt_var1.c:674
FLA_Error FLA_Bsvd_v_opz_var1(int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_H, int rs_H, int cs_H, dcomplex *buff_U, int rs_U, int cs_U, dcomplex *buff_V, int rs_V, int cs_V, int b_alg)
Definition FLA_Bsvd_v_opt_var1.c:912
FLA_Error FLA_Bsvd_v_ops_var1(int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_H, int rs_H, int cs_H, float *buff_U, int rs_U, int cs_U, float *buff_V, int rs_V, int cs_V, int b_alg)
Definition FLA_Bsvd_v_opt_var1.c:157
dim_t FLA_Obj_width(FLA_Obj obj)
Definition FLA_Query.c:123
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition FLA_Query.c:167
dim_t FLA_Obj_length(FLA_Obj obj)
Definition FLA_Query.c:116
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition FLA_Query.c:174
dim_t FLA_Obj_vector_inc(FLA_Obj obj)
Definition FLA_Query.c:145
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition FLA_Query.c:13
int FLA_Error
Definition FLA_type_defs.h:47
int FLA_Datatype
Definition FLA_type_defs.h:49

References FLA_Bsvd_v_opc_var1(), FLA_Bsvd_v_opd_var1(), FLA_Bsvd_v_ops_var1(), FLA_Bsvd_v_opz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), FLA_Obj_width(), and i.

Referenced by FLA_Svd_uv_unb_var1().

◆ FLA_Bsvd_v_opz_var1()

FLA_Error FLA_Bsvd_v_opz_var1 ( int  min_m_n,
int  m_U,
int  m_V,
int  n_GH,
int  n_iter_max,
double buff_d,
int  inc_d,
double buff_e,
int  inc_e,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_H,
int  rs_H,
int  cs_H,
dcomplex buff_U,
int  rs_U,
int  cs_U,
dcomplex buff_V,
int  rs_V,
int  cs_V,
int  b_alg 
)
924{
925 dcomplex one = bl1_z1();
926 double rzero = bl1_d0();
927
928 int maxitr = 6;
929
930 double eps;
931 double tolmul;
932 double tol;
933 double thresh;
934
935 dcomplex* G;
936 dcomplex* H;
937 double* d1;
938 double* e1;
939 int r_val;
940 int done;
941 int m_GH_sweep_max;
942 int ij_begin;
943 int ijTL, ijBR;
944 int m_A11;
945 int n_iter_perf;
946 int n_UV_apply;
948 int n_deflations;
949 int n_iter_prev;
951
952 // Compute some convergence constants.
954 tolmul = max( 10.0, min( 100.0, pow( eps, -0.125 ) ) );
956 tolmul,
957 maxitr,
958 buff_d, inc_d,
959 buff_e, inc_e,
960 &tol,
961 &thresh );
962#ifdef PRINTF
963 printf( "FLA_Bsvd_v_opz_var1: tolmul = %12.6e\n", tolmul );
964 printf( "FLA_Bsvd_v_opz_var1: tol = %12.6e\n", tol );
965 printf( "FLA_Bsvd_v_opz_var1: thresh = %12.6e\n", thresh );
966#endif
967
968 // Initialize our completion flag.
969 done = FALSE;
970
971 // Initialize a counter that holds the maximum number of rows of G
972 // and H that we would need to initialize for the next sweep.
974
975 // Initialize a counter for the total number of iterations performed.
976 n_iter_prev = 0;
977
978 // Iterate until the matrix has completely deflated.
979 for ( total_deflations = 0; done != TRUE; )
980 {
981
982 // Initialize G and H to contain only identity rotations.
984 n_GH,
985 &one,
986 buff_G, rs_G, cs_G );
988 n_GH,
989 &one,
990 buff_H, rs_H, cs_H );
991
992 // Keep track of the maximum number of iterations performed in the
993 // current sweep. This is used when applying the sweep's Givens
994 // rotations.
996
997 // Perform a sweep: Move through the matrix and perform a bidiagonal
998 // SVD on each non-zero submatrix that is encountered. During the
999 // first time through, ijTL will be 0 and ijBR will be min_m_n - 1.
1000 for ( ij_begin = 0; ij_begin < min_m_n; )
1001 {
1002
1003#ifdef PRINTF
1004 if ( ij_begin == 0 )
1005 printf( "FLA_Bsvd_v_opz_var1: beginning new sweep (ij_begin = %d)\n", ij_begin );
1006#endif
1007
1008 // Search for the first submatrix along the diagonal that is
1009 // bounded by zeroes (or endpoints of the matrix). If no
1010 // submatrix is found (ie: if the entire superdiagonal is zero
1011 // then FLA_FAILURE is returned. This function also inspects
1012 // superdiagonal elements for proximity to zero. If a given
1013 // element is close enough to zero, then it is deemed
1014 // converged and manually set to zero.
1016 ij_begin,
1017 buff_d, inc_d,
1018 buff_e, inc_e,
1019 &ijTL,
1020 &ijBR );
1021
1022 // Verify that a submatrix was found. If one was not found,
1023 // then we are done with the current sweep. Furthermore, if
1024 // a submatrix was not found AND we began our search at the
1025 // beginning of the matrix (ie: ij_begin == 0), then the
1026 // matrix has completely deflated and so we are done with
1027 // Francis step iteration.
1028 if ( r_val == FLA_FAILURE )
1029 {
1030 if ( ij_begin == 0 )
1031 {
1032#ifdef PRINTF
1033 printf( "FLA_Bsvd_v_opz_var1: superdiagonal is completely zero.\n" );
1034 printf( "FLA_Bsvd_v_opz_var1: Francis iteration is done!\n" );
1035#endif
1036 done = TRUE;
1037 }
1038
1039 // Break out of the current sweep so we can apply the last
1040 // remaining Givens rotations.
1041 break;
1042 }
1043
1044 // If we got this far, then:
1045 // (a) ijTL refers to the index of the first non-zero
1046 // superdiagonal along the diagonal, and
1047 // (b) ijBR refers to either:
1048 // - the first zero element that occurs after ijTL, or
1049 // - the the last diagonal element.
1050 // Note that ijTL and ijBR also correspond to the first and
1051 // last diagonal elements of the submatrix of interest. Thus,
1052 // we may compute the dimension of this submatrix as:
1053 m_A11 = ijBR - ijTL + 1;
1054
1055#ifdef PRINTF
1056 printf( "FLA_Bsvd_v_opz_var1: ij_begin = %d\n", ij_begin );
1057 printf( "FLA_Bsvd_v_opz_var1: ijTL = %d\n", ijTL );
1058 printf( "FLA_Bsvd_v_opz_var1: ijBR = %d\n", ijBR );
1059 printf( "FLA_Bsvd_v_opz_var1: m_A11 = %d\n", m_A11 );
1060#endif
1061
1062 // Adjust ij_begin, which gets us ready for the next submatrix
1063 // search in the current sweep.
1064 ij_begin = ijBR + 1;
1065
1066 // Index to the submatrices upon which we will operate.
1067 d1 = buff_d + ijTL * inc_d;
1068 e1 = buff_e + ijTL * inc_e;
1069 G = buff_G + ijTL * rs_G;
1070 H = buff_H + ijTL * rs_H;
1071
1072 // Search for a batch of singular values, recursing on deflated
1073 // subproblems whenever a split occurs. Iteration continues as
1074 // long as
1075 // (a) there is still matrix left to operate on, and
1076 // (b) the number of iterations performed in this batch is
1077 // less than n_G.
1078 // If/when either of the two above conditions fails to hold,
1079 // the function returns.
1081 n_GH,
1082 ijTL,
1083 tol,
1084 thresh,
1085 d1, inc_d,
1086 e1, inc_e,
1087 G, rs_G, cs_G,
1088 H, rs_H, cs_H,
1089 &n_iter_perf );
1090
1091 // Record the number of deflations that were observed.
1093
1094 // Update the maximum number of iterations performed in the
1095 // current sweep.
1097
1098#ifdef PRINTF
1099 printf( "FLA_Bsvd_v_opz_var1: deflations observed = %d\n", n_deflations );
1100 printf( "FLA_Bsvd_v_opz_var1: total deflations observed = %d\n", total_deflations );
1101 printf( "FLA_Bsvd_v_opz_var1: num iterations performed = %d\n", n_iter_perf );
1102#endif
1103
1104 // Store the most recent value of ijBR in m_G_sweep_max.
1105 // When the sweep is done, this value will contain the minimum
1106 // number of rows of G and H we can apply and safely include all
1107 // non-identity rotations that were computed during the
1108 // singular value searches.
1110
1111 // Make sure we haven't exceeded our maximum iteration count.
1112 if ( n_iter_prev >= n_iter_max * min_m_n )
1113 {
1114#ifdef PRINTF
1115 printf( "FLA_Bsvd_v_opz_var1: reached maximum total number of iterations: %d\n", n_iter_prev );
1116#endif
1117 FLA_Abort();
1118 //return FLA_FAILURE;
1119 }
1120 }
1121
1122 // The sweep is complete. Now we must apply the Givens rotations
1123 // that were accumulated during the sweep.
1124
1125 // Recall that the number of columns of U and V to which we apply
1126 // rotations is one more than the number of rotations.
1128
1129#ifdef PRINTF
1130 printf( "FLA_Bsvd_v_opz_var1: applying %d sets of Givens rotations\n", n_iter_perf_sweep_max );
1131 printf( "FLA_Bsvd_v_opz_var1: m_U = %d\n", m_U );
1132 printf( "FLA_Bsvd_v_opz_var1: m_V = %d\n", m_V );
1133 printf( "FLA_Bsvd_v_opz_var1: napp= %d\n", n_UV_apply );
1134#endif
1135 // Apply the Givens rotations. Note that we only apply k sets of
1136 // rotations, where k = n_iter_perf_sweep_max. Also note that we only
1137 // apply to n_UV_apply columns of U and V since this is the most we
1138 // need to touch given the most recent value stored to m_GH_sweep_max.
1139 //FLA_Apply_G_rf_blz_var5( n_iter_perf_sweep_max,
1141 //FLA_Apply_G_rf_blz_var9( n_iter_perf_sweep_max,
1142 //FLA_Apply_G_rf_blz_var6( n_iter_perf_sweep_max,
1143 m_U,
1144 n_UV_apply,
1145 buff_G, rs_G, cs_G,
1146 buff_U, rs_U, cs_U,
1147 b_alg );
1148 //FLA_Apply_G_rf_blz_var5( n_iter_perf_sweep_max,
1150 //FLA_Apply_G_rf_blz_var9( n_iter_perf_sweep_max,
1151 //FLA_Apply_G_rf_blz_var6( n_iter_perf_sweep_max,
1152 m_V,
1153 n_UV_apply,
1154 buff_H, rs_H, cs_H,
1155 buff_V, rs_V, cs_V,
1156 b_alg );
1157
1158 // Increment the total number of iterations previously performed.
1160
1161#ifdef PRINTF
1162 printf( "FLA_Bsvd_v_opz_var1: total number of iterations performed: %d\n", n_iter_prev );
1163#endif
1164 }
1165
1166 // Make all the singular values positive.
1167 {
1168 int i;
1169 double minus_one = bl1_dm1();
1170
1171 for ( i = 0; i < min_m_n; ++i )
1172 {
1173 if ( buff_d[ (i )*inc_d ] < rzero )
1174 {
1175 buff_d[ (i )*inc_d ] = -buff_d[ (i )*inc_d ];
1176
1177 // Scale the right singular vectors.
1179 m_V,
1180 &minus_one,
1181 buff_V + (i )*cs_V, rs_V );
1182 }
1183 }
1184 }
1185
1186 return n_iter_prev;
1187}
FLA_Error FLA_Apply_G_rf_blz_var3(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg)
Definition FLA_Apply_G_rf_blk_var3.c:186
void bl1_zdscalv(conj1_t conj, int n, double *alpha, dcomplex *x, int incx)
Definition bl1_scalv.c:61

References bl1_d0(), bl1_dm1(), bl1_z1(), bl1_zdscalv(), bl1_zsetm(), BLIS1_NO_CONJUGATE, FLA_Abort(), FLA_Apply_G_rf_blz_var3(), FLA_Bsvd_compute_tol_thresh_opd(), FLA_Bsvd_find_submatrix_opd(), FLA_Bsvd_iteracc_v_opd_var1(), FLA_Mach_params_opd(), and i.

Referenced by FLA_Bsvd_v_opt_var1().