Skip to content

Commit 87da616

Browse files
author
Ian
committed
intermediate push
1 parent 63ddace commit 87da616

File tree

4 files changed

+170
-98
lines changed

4 files changed

+170
-98
lines changed

src/lib.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ pub mod legacy;
22
pub mod error;
33
mod new;
44
mod masked;
5+
pub(crate) mod utils;
56

67
pub use new::*;
78
pub use masked::*;
@@ -155,7 +156,7 @@ mod simple_comparison_tests {
155156
fn test_real_sparse_matrix() {
156157
// Create a matrix with similar sparsity to your real one (99.02%)
157158
let test_matrix = create_sparse_matrix(100, 100, 0.0098); // 0.98% non-zeros
158-
159+
159160
// Should no longer fail with convergence error
160161
let result = svd_dim_seed(&test_matrix, 50, 42); // Using your modified imtqlb
161162
assert!(result.is_ok(), "{}", format!("SVD failed on 99.02% sparse matrix, {:?}", result.err().unwrap()));

src/masked.rs

Lines changed: 54 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
1+
use crate::utils::determine_chunk_size;
12
use crate::{SMat, SvdFloat};
23
use nalgebra_sparse::CsrMatrix;
34
use num_traits::Float;
5+
use rayon::iter::ParallelIterator;
6+
use rayon::prelude::{IntoParallelIterator, ParallelBridge};
47
use std::ops::AddAssign;
58

69
pub struct MaskedCSRMatrix<'a, T: Float> {
@@ -47,20 +50,18 @@ impl<'a, T: Float> MaskedCSRMatrix<'a, T> {
4750
Self::new(matrix, mask)
4851
}
4952

50-
// Add this method to help with small matrix comparison tests
5153
pub fn uses_all_columns(&self) -> bool {
5254
self.masked_to_original.len() == self.matrix.ncols() && self.column_mask.iter().all(|&x| x)
5355
}
5456

55-
// Add this method for special case handling
5657
pub fn ensure_identical_results_mode(&self) -> bool {
5758
// For very small matrices where precision is critical
5859
let is_small_matrix = self.matrix.nrows() <= 5 && self.matrix.ncols() <= 5;
5960
is_small_matrix && self.uses_all_columns()
6061
}
6162
}
6263

63-
impl<'a, T: Float + AddAssign> SMat<T> for MaskedCSRMatrix<'a, T> {
64+
impl<'a, T: Float + AddAssign + Sync + Send> SMat<T> for MaskedCSRMatrix<'a, T> {
6465
fn nrows(&self) -> usize {
6566
self.matrix.nrows()
6667
}
@@ -75,7 +76,7 @@ impl<'a, T: Float + AddAssign> SMat<T> for MaskedCSRMatrix<'a, T> {
7576

7677
for i in 0..self.matrix.nrows() {
7778
for j in major_offsets[i]..major_offsets[i + 1] {
78-
let col = minor_indices[j]; // Fixed: Use j instead of i
79+
let col = minor_indices[j];
7980
if self.column_mask[col] {
8081
count += 1;
8182
}
@@ -85,6 +86,7 @@ impl<'a, T: Float + AddAssign> SMat<T> for MaskedCSRMatrix<'a, T> {
8586
}
8687

8788
fn svd_opa(&self, x: &[T], y: &mut [T], transposed: bool) {
89+
// TODO parallelize me please
8890
let nrows = if transposed {
8991
self.ncols()
9092
} else {
@@ -132,15 +134,26 @@ impl<'a, T: Float + AddAssign> SMat<T> for MaskedCSRMatrix<'a, T> {
132134
y[i] = sum;
133135
}
134136
} else {
135-
// Standard implementation for normal cases
136-
for i in 0..self.matrix.nrows() {
137-
for j in major_offsets[i]..major_offsets[i + 1] {
138-
let col = minor_indices[j];
139-
if let Some(masked_col) = self.original_to_masked[col] {
140-
y[i] += values[j] * x[masked_col];
137+
let chunk_size = determine_chunk_size(self.matrix.nrows());
138+
y.chunks_mut(chunk_size).enumerate().par_bridge().for_each(
139+
|(chunk_idx, y_chunk)| {
140+
let start_row = chunk_idx * chunk_size;
141+
let end_row = (start_row + y_chunk.len()).min(self.matrix.nrows());
142+
143+
for i in start_row..end_row {
144+
let row_idx = i - start_row;
145+
let mut sum = T::zero();
146+
147+
for j in major_offsets[i]..major_offsets[i + 1] {
148+
let col = minor_indices[j];
149+
if let Some(masked_col) = self.original_to_masked[col] {
150+
sum += values[j] * x[masked_col];
151+
};
152+
}
153+
y_chunk[row_idx] = sum;
141154
}
142-
}
143-
}
155+
},
156+
);
144157
}
145158
} else {
146159
// For the transposed case (A^T * x)
@@ -160,13 +173,35 @@ impl<'a, T: Float + AddAssign> SMat<T> for MaskedCSRMatrix<'a, T> {
160173
}
161174
}
162175
} else {
163-
// Existing implementation for transposed case
164-
for i in 0..self.matrix.nrows() {
165-
let row_val = x[i];
166-
for j in major_offsets[i]..major_offsets[i + 1] {
167-
let col = minor_indices[j];
168-
if let Some(masked_col) = self.original_to_masked[col] {
169-
y[masked_col] += values[j] * row_val;
176+
let nrows = self.matrix.nrows();
177+
let chunk_size = determine_chunk_size(nrows);
178+
let num_chunks = (nrows + chunk_size - 1) / chunk_size;
179+
let results: Vec<Vec<T>> = (0..chunk_size)
180+
.into_par_iter()
181+
.map(|chunk_idx| {
182+
let start = chunk_idx * chunk_size;
183+
let end = (start + chunk_size).min(nrows);
184+
185+
let mut local_y = vec![T::zero(); y.len()];
186+
for i in start..end {
187+
let row_val = x[i];
188+
for j in major_offsets[i]..major_offsets[i + 1] {
189+
let col = minor_indices[j];
190+
if let Some(masked_col) = self.original_to_masked[col] {
191+
local_y[masked_col] += values[j] * row_val;
192+
}
193+
}
194+
}
195+
local_y
196+
})
197+
.collect();
198+
199+
y.fill(T::zero());
200+
201+
for local_y in results {
202+
for (idx, val) in local_y.iter().enumerate() {
203+
if !val.is_zero() {
204+
y[idx] += *val;
170205
}
171206
}
172207
}

src/new.rs

Lines changed: 103 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,13 @@ use rand::rngs::StdRng;
66
use rand::{thread_rng, Rng, SeedableRng};
77
use rayon::iter::IndexedParallelIterator;
88
use rayon::iter::ParallelIterator;
9-
use rayon::prelude::{IntoParallelRefIterator, IntoParallelRefMutIterator};
9+
use rayon::prelude::{IntoParallelIterator, IntoParallelRefIterator, IntoParallelRefMutIterator};
1010
use std::fmt::Debug;
1111
use std::iter::Sum;
1212
use std::mem;
1313
use std::ops::{AddAssign, MulAssign, Neg, SubAssign};
1414

15-
pub trait SMat<T: Float> {
15+
pub trait SMat<T: Float>: Sync {
1616
fn nrows(&self) -> usize;
1717
fn ncols(&self) -> usize;
1818
fn nnz(&self) -> usize;
@@ -420,9 +420,9 @@ fn svd_daxpy<T: Float + AddAssign + Send + Sync>(da: T, x: &[T], y: &mut [T]) {
420420
*yval += da * *xval
421421
}
422422
} else {
423-
y.par_iter_mut().zip(x.par_iter()).for_each(|(yval, xval)| {
424-
*yval += da * *xval
425-
});
423+
y.par_iter_mut()
424+
.zip(x.par_iter())
425+
.for_each(|(yval, xval)| *yval += da * *xval);
426426
}
427427
}
428428

@@ -1222,46 +1222,45 @@ fn ritvec<T: SvdFloat>(
12221222
kappa
12231223
};
12241224

1225-
let mut nsig = 0;
1226-
let mut x = 0;
1227-
let mut id2 = jsq - js;
1225+
let mut x = dimensions - 1;
12281226

1229-
let mut significant_count = 0;
1230-
for k in 0..js {
1231-
// Adaptive error bound check using relative tolerance
1232-
let relative_bound = adaptive_kappa * wrk.ritz[k].abs().max(max_eigenvalue * adaptive_eps);
1233-
if wrk.bnd[k] <= relative_bound && k + 1 > js - neig {
1234-
significant_count += 1;
1235-
}
1236-
}
1227+
let store_vectors: Vec<Vec<T>> = (0..js).map(|i| store.retrq(i).to_vec()).collect();
12371228

1238-
id2 = jsq - js;
1239-
for k in 0..js {
1240-
// Adaptive error bound check
1241-
let relative_bound = adaptive_kappa * wrk.ritz[k].abs().max(max_eigenvalue * adaptive_eps);
1242-
if wrk.bnd[k] <= relative_bound && k + 1 > js - neig {
1243-
x = match x {
1244-
0 => dimensions - 1,
1245-
_ => x - 1,
1246-
};
1229+
let significant_indices: Vec<usize> = (0..js)
1230+
.into_par_iter()
1231+
.filter(|&k| {
1232+
// Adaptive error bound check using relative tolerance
1233+
let relative_bound =
1234+
adaptive_kappa * wrk.ritz[k].abs().max(max_eigenvalue * adaptive_eps);
1235+
wrk.bnd[k] <= relative_bound && k + 1 > js - neig
1236+
})
1237+
.collect();
1238+
1239+
let nsig = significant_indices.len();
12471240

1248-
let offset = x * Vt.cols;
1249-
Vt.value[offset..offset + Vt.cols].fill(T::zero());
1250-
let mut idx = id2 + js;
1241+
let mut vt_vectors: Vec<(usize, Vec<T>)> = significant_indices
1242+
.into_par_iter()
1243+
.map(|k| {
1244+
let mut vec = vec![T::zero(); wrk.ncols];
1245+
let mut idx = (jsq - js) + k + 1;
12511246

12521247
for i in 0..js {
12531248
idx -= js;
12541249
// Non-zero check with adaptive threshold
12551250
if s[idx].abs() > adaptive_eps {
1256-
for (j, item) in store.retrq(i).iter().enumerate().take(Vt.cols) {
1257-
Vt.value[j + offset] += s[idx] * *item;
1251+
for (j, item) in store_vectors[i].iter().enumerate().take(wrk.ncols) {
1252+
vec[j] += s[idx] * *item;
12581253
}
12591254
}
12601255
}
1261-
nsig += 1;
1262-
}
1263-
id2 += 1;
1264-
}
1256+
1257+
// Return with position index (for proper ordering)
1258+
(k, vec)
1259+
})
1260+
.collect();
1261+
1262+
// Sort by k value to maintain original order
1263+
vt_vectors.sort_by_key(|(k, _)| *k);
12651264

12661265
// Rotate the singular vectors and values.
12671266
// `x` is now the location of the highest singular value.
@@ -1276,72 +1275,98 @@ fn ritvec<T: SvdFloat>(
12761275
cols: wrk.nrows,
12771276
value: vec![T::zero(); wrk.nrows * d],
12781277
};
1279-
Vt.value.resize(Vt.cols * d, T::zero());
1278+
let mut Vt = DMat {
1279+
cols: wrk.ncols,
1280+
value: vec![T::zero(); wrk.ncols * d],
1281+
};
12801282

1281-
let mut tmp_vec = vec![T::zero(); Vt.cols];
1282-
for (i, sval) in S.iter_mut().enumerate() {
1283+
for (i, (_, vec)) in vt_vectors.into_iter().take(d).enumerate() {
12831284
let vt_offset = i * Vt.cols;
1284-
let ut_offset = i * Ut.cols;
1285+
Vt.value[vt_offset..vt_offset + Vt.cols].copy_from_slice(&vec);
1286+
}
12851287

1288+
let d = dimensions.min(nsig);
1289+
let mut S = vec![T::zero(); d];
1290+
let mut Ut = DMat {
1291+
cols: wrk.nrows,
1292+
value: vec![T::zero(); wrk.nrows * d],
1293+
};
1294+
let mut Vt = DMat {
1295+
cols: wrk.ncols,
1296+
value: vec![T::zero(); wrk.ncols * d],
1297+
};
1298+
1299+
// Fill Vt with the vectors we computed
1300+
for (i, (_, vec)) in vt_vectors.into_iter().take(d).enumerate() {
1301+
let vt_offset = i * Vt.cols;
1302+
Vt.value[vt_offset..vt_offset + Vt.cols].copy_from_slice(&vec);
1303+
}
1304+
1305+
// Prepare for parallel computation of S and Ut
1306+
let mut ab_products = Vec::with_capacity(d);
1307+
let mut a_products = Vec::with_capacity(d);
1308+
1309+
// First compute all matrix-vector products sequentially
1310+
for i in 0..d {
1311+
let vt_offset = i * Vt.cols;
12861312
let vt_vec = &Vt.value[vt_offset..vt_offset + Vt.cols];
1287-
let ut_vec = &mut Ut.value[ut_offset..ut_offset + Ut.cols];
12881313

1289-
// Multiply by matrix B first
1314+
let mut tmp_vec = vec![T::zero(); Vt.cols];
1315+
let mut ut_vec = vec![T::zero(); wrk.nrows];
1316+
1317+
// Matrix-vector products with A and A'A
12901318
svd_opb(A, vt_vec, &mut tmp_vec, &mut wrk.temp, wrk.transposed);
1291-
let t = svd_ddot(vt_vec, &tmp_vec);
1292-
1293-
// Store the Singular Value at S[i], with safety check for negative values
1294-
// that can happen due to numerical precision
1295-
*sval = t.max(T::zero()).sqrt();
1296-
1297-
// Safety check for zero-division
1298-
if t > adaptive_eps {
1299-
svd_daxpy(-t, vt_vec, &mut tmp_vec);
1300-
// Protect against division by extremely small values
1301-
if *sval > adaptive_eps {
1302-
wrk.bnd[js] = svd_norm(&tmp_vec) / *sval;
1303-
} else {
1304-
wrk.bnd[js] = T::from_f64(f64::MAX).unwrap() * T::from_f64(0.1).unwrap();
1305-
}
1319+
A.svd_opa(vt_vec, &mut ut_vec, wrk.transposed);
13061320

1307-
// Multiply by matrix A to get (scaled) left s-vector
1308-
A.svd_opa(vt_vec, ut_vec, wrk.transposed);
1321+
ab_products.push(tmp_vec);
1322+
a_products.push(ut_vec);
1323+
}
13091324

1310-
// Safe scaling - avoid division by very small numbers
1311-
if *sval > adaptive_eps {
1312-
svd_dscal(T::one() / *sval, ut_vec);
1313-
} else {
1314-
// For extremely small singular values, use a bounded scaling factor
1315-
let dls = sval.max(adaptive_eps);
1316-
let safe_scale = T::one() / dls;
1317-
svd_dscal(safe_scale, ut_vec);
1318-
}
1325+
let results: Vec<(usize, T)> = (0..d)
1326+
.into_par_iter()
1327+
.map(|i| {
1328+
let vt_offset = i * Vt.cols;
1329+
let vt_vec = &Vt.value[vt_offset..vt_offset + Vt.cols];
1330+
let tmp_vec = &ab_products[i];
1331+
1332+
// Compute singular value
1333+
let t = svd_ddot(vt_vec, tmp_vec);
1334+
let sval = t.max(T::zero()).sqrt();
1335+
1336+
(i, sval)
1337+
})
1338+
.collect();
1339+
1340+
// Process results and scale the vectors
1341+
for (i, sval) in results {
1342+
S[i] = sval;
1343+
let ut_offset = i * Ut.cols;
1344+
let mut ut_vec = a_products[i].clone();
1345+
1346+
// Safe scaling - avoid division by very small numbers
1347+
if sval > adaptive_eps {
1348+
svd_dscal(T::one() / sval, &mut ut_vec);
13191349
} else {
1320-
// For effectively zero singular values, just use the right vector
1321-
// but scale it reasonably
1322-
A.svd_opa(vt_vec, ut_vec, wrk.transposed);
1323-
let norm = svd_norm(ut_vec);
1324-
if norm > adaptive_eps {
1325-
svd_dscal(T::one() / norm, ut_vec);
1326-
}
1327-
wrk.bnd[js] = T::from_f64(f64::MAX).unwrap() * T::from_f64(0.01).unwrap();
1350+
// For extremely small singular values, use a bounded scaling factor
1351+
let dls = sval.max(adaptive_eps);
1352+
let safe_scale = T::one() / dls;
1353+
svd_dscal(safe_scale, &mut ut_vec);
13281354
}
1355+
1356+
// Copy to output
1357+
Ut.value[ut_offset..ut_offset + Ut.cols].copy_from_slice(&ut_vec);
13291358
}
13301359

13311360
Ok(SVDRawRec {
13321361
// Dimensionality (rank)
13331362
d,
1334-
13351363
// Significant values
13361364
nsig,
1337-
13381365
// DMat Ut Transpose of left singular vectors. (d by m)
13391366
// The vectors are the rows of Ut.
13401367
Ut,
1341-
13421368
// Array of singular values. (length d)
13431369
S,
1344-
13451370
// DMat Vt Transpose of right singular vectors. (d by n)
13461371
// The vectors are the rows of Vt.
13471372
Vt,

0 commit comments

Comments
 (0)