@@ -6,13 +6,13 @@ use rand::rngs::StdRng;
66use rand:: { thread_rng, Rng , SeedableRng } ;
77use rayon:: iter:: IndexedParallelIterator ;
88use rayon:: iter:: ParallelIterator ;
9- use rayon:: prelude:: { IntoParallelRefIterator , IntoParallelRefMutIterator } ;
9+ use rayon:: prelude:: { IntoParallelIterator , IntoParallelRefIterator , IntoParallelRefMutIterator } ;
1010use std:: fmt:: Debug ;
1111use std:: iter:: Sum ;
1212use std:: mem;
1313use std:: ops:: { AddAssign , MulAssign , Neg , SubAssign } ;
1414
15- pub trait SMat < T : Float > {
15+ pub trait SMat < T : Float > : Sync {
1616 fn nrows ( & self ) -> usize ;
1717 fn ncols ( & self ) -> usize ;
1818 fn nnz ( & self ) -> usize ;
@@ -420,9 +420,9 @@ fn svd_daxpy<T: Float + AddAssign + Send + Sync>(da: T, x: &[T], y: &mut [T]) {
420420 * yval += da * * xval
421421 }
422422 } else {
423- y. par_iter_mut ( ) . zip ( x . par_iter ( ) ) . for_each ( | ( yval , xval ) | {
424- * yval += da * * xval
425- } ) ;
423+ y. par_iter_mut ( )
424+ . zip ( x . par_iter ( ) )
425+ . for_each ( | ( yval , xval ) | * yval += da * * xval ) ;
426426 }
427427}
428428
@@ -1222,46 +1222,45 @@ fn ritvec<T: SvdFloat>(
12221222 kappa
12231223 } ;
12241224
1225- let mut nsig = 0 ;
1226- let mut x = 0 ;
1227- let mut id2 = jsq - js;
1225+ let mut x = dimensions - 1 ;
12281226
1229- let mut significant_count = 0 ;
1230- for k in 0 ..js {
1231- // Adaptive error bound check using relative tolerance
1232- let relative_bound = adaptive_kappa * wrk. ritz [ k] . abs ( ) . max ( max_eigenvalue * adaptive_eps) ;
1233- if wrk. bnd [ k] <= relative_bound && k + 1 > js - neig {
1234- significant_count += 1 ;
1235- }
1236- }
1227+ let store_vectors: Vec < Vec < T > > = ( 0 ..js) . map ( |i| store. retrq ( i) . to_vec ( ) ) . collect ( ) ;
12371228
1238- id2 = jsq - js;
1239- for k in 0 ..js {
1240- // Adaptive error bound check
1241- let relative_bound = adaptive_kappa * wrk. ritz [ k] . abs ( ) . max ( max_eigenvalue * adaptive_eps) ;
1242- if wrk. bnd [ k] <= relative_bound && k + 1 > js - neig {
1243- x = match x {
1244- 0 => dimensions - 1 ,
1245- _ => x - 1 ,
1246- } ;
1229+ let significant_indices: Vec < usize > = ( 0 ..js)
1230+ . into_par_iter ( )
1231+ . filter ( |& k| {
1232+ // Adaptive error bound check using relative tolerance
1233+ let relative_bound =
1234+ adaptive_kappa * wrk. ritz [ k] . abs ( ) . max ( max_eigenvalue * adaptive_eps) ;
1235+ wrk. bnd [ k] <= relative_bound && k + 1 > js - neig
1236+ } )
1237+ . collect ( ) ;
1238+
1239+ let nsig = significant_indices. len ( ) ;
12471240
1248- let offset = x * Vt . cols ;
1249- Vt . value [ offset..offset + Vt . cols ] . fill ( T :: zero ( ) ) ;
1250- let mut idx = id2 + js;
1241+ let mut vt_vectors: Vec < ( usize , Vec < T > ) > = significant_indices
1242+ . into_par_iter ( )
1243+ . map ( |k| {
1244+ let mut vec = vec ! [ T :: zero( ) ; wrk. ncols] ;
1245+ let mut idx = ( jsq - js) + k + 1 ;
12511246
12521247 for i in 0 ..js {
12531248 idx -= js;
12541249 // Non-zero check with adaptive threshold
12551250 if s[ idx] . abs ( ) > adaptive_eps {
1256- for ( j, item) in store . retrq ( i ) . iter ( ) . enumerate ( ) . take ( Vt . cols ) {
1257- Vt . value [ j + offset ] += s[ idx] * * item;
1251+ for ( j, item) in store_vectors [ i ] . iter ( ) . enumerate ( ) . take ( wrk . ncols ) {
1252+ vec [ j ] += s[ idx] * * item;
12581253 }
12591254 }
12601255 }
1261- nsig += 1 ;
1262- }
1263- id2 += 1 ;
1264- }
1256+
1257+ // Return with position index (for proper ordering)
1258+ ( k, vec)
1259+ } )
1260+ . collect ( ) ;
1261+
1262+ // Sort by k value to maintain original order
1263+ vt_vectors. sort_by_key ( |( k, _) | * k) ;
12651264
12661265 // Rotate the singular vectors and values.
12671266 // `x` is now the location of the highest singular value.
@@ -1276,72 +1275,98 @@ fn ritvec<T: SvdFloat>(
12761275 cols : wrk. nrows ,
12771276 value : vec ! [ T :: zero( ) ; wrk. nrows * d] ,
12781277 } ;
1279- Vt . value . resize ( Vt . cols * d, T :: zero ( ) ) ;
1278+ let mut Vt = DMat {
1279+ cols : wrk. ncols ,
1280+ value : vec ! [ T :: zero( ) ; wrk. ncols * d] ,
1281+ } ;
12801282
1281- let mut tmp_vec = vec ! [ T :: zero( ) ; Vt . cols] ;
1282- for ( i, sval) in S . iter_mut ( ) . enumerate ( ) {
1283+ for ( i, ( _, vec) ) in vt_vectors. into_iter ( ) . take ( d) . enumerate ( ) {
12831284 let vt_offset = i * Vt . cols ;
1284- let ut_offset = i * Ut . cols ;
1285+ Vt . value [ vt_offset..vt_offset + Vt . cols ] . copy_from_slice ( & vec) ;
1286+ }
12851287
1288+ let d = dimensions. min ( nsig) ;
1289+ let mut S = vec ! [ T :: zero( ) ; d] ;
1290+ let mut Ut = DMat {
1291+ cols : wrk. nrows ,
1292+ value : vec ! [ T :: zero( ) ; wrk. nrows * d] ,
1293+ } ;
1294+ let mut Vt = DMat {
1295+ cols : wrk. ncols ,
1296+ value : vec ! [ T :: zero( ) ; wrk. ncols * d] ,
1297+ } ;
1298+
1299+ // Fill Vt with the vectors we computed
1300+ for ( i, ( _, vec) ) in vt_vectors. into_iter ( ) . take ( d) . enumerate ( ) {
1301+ let vt_offset = i * Vt . cols ;
1302+ Vt . value [ vt_offset..vt_offset + Vt . cols ] . copy_from_slice ( & vec) ;
1303+ }
1304+
1305+ // Prepare for parallel computation of S and Ut
1306+ let mut ab_products = Vec :: with_capacity ( d) ;
1307+ let mut a_products = Vec :: with_capacity ( d) ;
1308+
1309+ // First compute all matrix-vector products sequentially
1310+ for i in 0 ..d {
1311+ let vt_offset = i * Vt . cols ;
12861312 let vt_vec = & Vt . value [ vt_offset..vt_offset + Vt . cols ] ;
1287- let ut_vec = & mut Ut . value [ ut_offset..ut_offset + Ut . cols ] ;
12881313
1289- // Multiply by matrix B first
1314+ let mut tmp_vec = vec ! [ T :: zero( ) ; Vt . cols] ;
1315+ let mut ut_vec = vec ! [ T :: zero( ) ; wrk. nrows] ;
1316+
1317+ // Matrix-vector products with A and A'A
12901318 svd_opb ( A , vt_vec, & mut tmp_vec, & mut wrk. temp , wrk. transposed ) ;
1291- let t = svd_ddot ( vt_vec, & tmp_vec) ;
1292-
1293- // Store the Singular Value at S[i], with safety check for negative values
1294- // that can happen due to numerical precision
1295- * sval = t. max ( T :: zero ( ) ) . sqrt ( ) ;
1296-
1297- // Safety check for zero-division
1298- if t > adaptive_eps {
1299- svd_daxpy ( -t, vt_vec, & mut tmp_vec) ;
1300- // Protect against division by extremely small values
1301- if * sval > adaptive_eps {
1302- wrk. bnd [ js] = svd_norm ( & tmp_vec) / * sval;
1303- } else {
1304- wrk. bnd [ js] = T :: from_f64 ( f64:: MAX ) . unwrap ( ) * T :: from_f64 ( 0.1 ) . unwrap ( ) ;
1305- }
1319+ A . svd_opa ( vt_vec, & mut ut_vec, wrk. transposed ) ;
13061320
1307- // Multiply by matrix A to get (scaled) left s-vector
1308- A . svd_opa ( vt_vec, ut_vec, wrk. transposed ) ;
1321+ ab_products. push ( tmp_vec) ;
1322+ a_products. push ( ut_vec) ;
1323+ }
13091324
1310- // Safe scaling - avoid division by very small numbers
1311- if * sval > adaptive_eps {
1312- svd_dscal ( T :: one ( ) / * sval, ut_vec) ;
1313- } else {
1314- // For extremely small singular values, use a bounded scaling factor
1315- let dls = sval. max ( adaptive_eps) ;
1316- let safe_scale = T :: one ( ) / dls;
1317- svd_dscal ( safe_scale, ut_vec) ;
1318- }
1325+ let results: Vec < ( usize , T ) > = ( 0 ..d)
1326+ . into_par_iter ( )
1327+ . map ( |i| {
1328+ let vt_offset = i * Vt . cols ;
1329+ let vt_vec = & Vt . value [ vt_offset..vt_offset + Vt . cols ] ;
1330+ let tmp_vec = & ab_products[ i] ;
1331+
1332+ // Compute singular value
1333+ let t = svd_ddot ( vt_vec, tmp_vec) ;
1334+ let sval = t. max ( T :: zero ( ) ) . sqrt ( ) ;
1335+
1336+ ( i, sval)
1337+ } )
1338+ . collect ( ) ;
1339+
1340+ // Process results and scale the vectors
1341+ for ( i, sval) in results {
1342+ S [ i] = sval;
1343+ let ut_offset = i * Ut . cols ;
1344+ let mut ut_vec = a_products[ i] . clone ( ) ;
1345+
1346+ // Safe scaling - avoid division by very small numbers
1347+ if sval > adaptive_eps {
1348+ svd_dscal ( T :: one ( ) / sval, & mut ut_vec) ;
13191349 } else {
1320- // For effectively zero singular values, just use the right vector
1321- // but scale it reasonably
1322- A . svd_opa ( vt_vec, ut_vec, wrk. transposed ) ;
1323- let norm = svd_norm ( ut_vec) ;
1324- if norm > adaptive_eps {
1325- svd_dscal ( T :: one ( ) / norm, ut_vec) ;
1326- }
1327- wrk. bnd [ js] = T :: from_f64 ( f64:: MAX ) . unwrap ( ) * T :: from_f64 ( 0.01 ) . unwrap ( ) ;
1350+ // For extremely small singular values, use a bounded scaling factor
1351+ let dls = sval. max ( adaptive_eps) ;
1352+ let safe_scale = T :: one ( ) / dls;
1353+ svd_dscal ( safe_scale, & mut ut_vec) ;
13281354 }
1355+
1356+ // Copy to output
1357+ Ut . value [ ut_offset..ut_offset + Ut . cols ] . copy_from_slice ( & ut_vec) ;
13291358 }
13301359
13311360 Ok ( SVDRawRec {
13321361 // Dimensionality (rank)
13331362 d,
1334-
13351363 // Significant values
13361364 nsig,
1337-
13381365 // DMat Ut Transpose of left singular vectors. (d by m)
13391366 // The vectors are the rows of Ut.
13401367 Ut ,
1341-
13421368 // Array of singular values. (length d)
13431369 S ,
1344-
13451370 // DMat Vt Transpose of right singular vectors. (d by n)
13461371 // The vectors are the rows of Vt.
13471372 Vt ,
0 commit comments