Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions src/main/java/edu/ucsd/msjava/fragindex/DirectStore.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package edu.ucsd.msjava.fragindex;

/**
* In-memory fragment-index store. Slabs live as {@link Slab} objects on the
* Java heap by default; Phase 4 will add an off-heap {@link java.nio.DirectByteBuffer}
* backing so the working set stays out of {@code -Xmx}.
*
* <p>For Phase 1 this is the only available store. It's used by unit tests
* and by in-memory search runs against small FASTA files.
*/
public final class DirectStore implements FragmentIndexStore {
private final Slab[] slabs;

public DirectStore(int slabCount) {
this.slabs = new Slab[slabCount];
}

@Override
public int slabCount() { return slabs.length; }

@Override
public void putSlab(int slabId, Slab slab) {
slabs[slabId] = slab;
}

@Override
public Slab openSlab(int slabId) {
return slabs[slabId];
}
}
63 changes: 63 additions & 0 deletions src/main/java/edu/ucsd/msjava/fragindex/EliasFano.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
package edu.ucsd.msjava.fragindex;

import java.nio.ByteBuffer;
import java.nio.ByteOrder;

/**
* Simple Elias-Fano-inspired codec for sorted non-decreasing int[] lists.
*
* Layout (little-endian):
* [4 bytes: length N]
* [4 bytes: max value U, or 0 if N==0]
* [for each value i: 4 bytes raw int]
*
* This first cut is correctness-only — plain int array encoding. A compact
* Elias-Fano layout replaces this in Task 7 once the API shape is stable.
*/
public final class EliasFano {
private EliasFano() {}

public static byte[] encode(int[] values) {
int n = values.length;
ByteBuffer buf = ByteBuffer.allocate(8 + 4 * n).order(ByteOrder.LITTLE_ENDIAN);
buf.putInt(n);
buf.putInt(n == 0 ? 0 : values[n - 1]);
for (int v : values) buf.putInt(v);
return buf.array();
}

public static int[] decode(byte[] encoded) {
ByteBuffer buf = ByteBuffer.wrap(encoded).order(ByteOrder.LITTLE_ENDIAN);
int n = buf.getInt();
buf.getInt(); // max value; unused in this naive layout
int[] out = new int[n];
for (int i = 0; i < n; i++) out[i] = buf.getInt();
return out;
}

public static final class Cursor {
private final ByteBuffer buf;
private final int total;
private int index;

Cursor(ByteBuffer buf, int total) {
this.buf = buf;
this.total = total;
this.index = 0;
}

public boolean hasNext() { return index < total; }
public int next() {
int v = buf.getInt();
index++;
return v;
}
}

public static Cursor open(byte[] encoded) {
ByteBuffer buf = ByteBuffer.wrap(encoded).order(ByteOrder.LITTLE_ENDIAN);
int n = buf.getInt();
buf.getInt(); // max (unused)
return new Cursor(buf, n);
}
}
43 changes: 43 additions & 0 deletions src/main/java/edu/ucsd/msjava/fragindex/Fingerprint128.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
package edu.ucsd.msjava.fragindex;

/**
* 128-bit fragment fingerprint split into b-ion / y-ion halves.
*
* <p>Each theoretical fragment hashes into one bit in the appropriate half via
* {@code bucket_index % 64}. At search time a spectrum's fingerprint is ANDed
* with each candidate peptide's fingerprint and popcounted — peptides whose
* fragment set doesn't share enough bits with the spectrum are pruned before
* any fragment-index lookup runs.
*
* <p>Threshold tuning (default {@code popcountAnd ≥ 8}) lives in the caller,
* not this class.
*/
public final class Fingerprint128 {
private long lo; // b-ion bits
private long hi; // y-ion bits

public Fingerprint128() {}

public Fingerprint128(long lo, long hi) {
this.lo = lo;
this.hi = hi;
}

public void setBIonBucket(int bucketIndex) {
lo |= 1L << (bucketIndex & 63);
}

public void setYIonBucket(int bucketIndex) {
hi |= 1L << (bucketIndex & 63);
}

public int popcountB() { return Long.bitCount(lo); }
public int popcountY() { return Long.bitCount(hi); }

public int popcountAnd(Fingerprint128 other) {
return Long.bitCount(lo & other.lo) + Long.bitCount(hi & other.hi);
}

public long loBits() { return lo; }
public long hiBits() { return hi; }
}
23 changes: 23 additions & 0 deletions src/main/java/edu/ucsd/msjava/fragindex/FragmentIndexStore.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
package edu.ucsd.msjava.fragindex;

/**
* Storage backend for fragment-index slabs. Implementations differ in whether
* slabs live off-heap in memory ({@link DirectStore}) or on disk via mmap
* (to be added in Phase 4).
*
* <p>All methods must be thread-safe for concurrent readers after
* {@link #putSlab(int, Slab)} has been called for each slab during build.
*/
public interface FragmentIndexStore {
/** Total number of slabs this store holds (set at construction). */
int slabCount();

/** Install a slab at the given id. Called once per slab during build. */
void putSlab(int slabId, Slab slab);

/** Return the slab at the given id, or null if none has been put yet. */
Slab openSlab(int slabId);

/** Optional hint that the caller has finished with the slab. Default: no-op. */
default void closeSlab(Slab slab) {}
}
70 changes: 70 additions & 0 deletions src/main/java/edu/ucsd/msjava/fragindex/Slab.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
package edu.ucsd.msjava.fragindex;

/**
* Immutable read-only view over one precursor-mass slab of the fragment index.
*
* <p>Returned by {@link SlabBuilder#finish()} once all peptides and fragments
* are loaded. Immutable by construction: the fingerprint array is never
* mutated after construction, and {@link #fingerprint(int)} returns a fresh
* snapshot rather than the internal object. Safe for concurrent readers.
*/
public final class Slab {
private final int slabId;
private final double minMassDa;
private final double maxMassDa;
private final int peptideCount;
private final Fingerprint128[] fingerprints;
private final byte[][] bucketEncoded; // bucket -> Elias-Fano-encoded peptide-id list

Slab(int slabId, double minMassDa, double maxMassDa,
Fingerprint128[] fingerprints, byte[][] bucketEncoded) {
this.slabId = slabId;
this.minMassDa = minMassDa;
this.maxMassDa = maxMassDa;
this.peptideCount = fingerprints.length;
this.fingerprints = fingerprints;
this.bucketEncoded = bucketEncoded;
}

public int slabId() { return slabId; }
public double minMassDa() { return minMassDa; }
public double maxMassDa() { return maxMassDa; }
public int peptideCount() { return peptideCount; }

/**
* Returns the fingerprint bits for the given peptide as an immutable
* 2-long snapshot. The returned Fingerprint128 is a fresh object built
* from the peptide's lo/hi bit-words; mutating it has no effect on the
* slab's internal state. Callers that only need bit-level AND+popcount
* can use {@link #fingerprintLoBits(int)} / {@link #fingerprintHiBits(int)}
* for zero-allocation access.
*/
public Fingerprint128 fingerprint(int peptideId) {
Fingerprint128 src = fingerprints[peptideId];
return new Fingerprint128(src.loBits(), src.hiBits());
}

/** Zero-allocation read of the b-ion fingerprint word for a peptide. */
public long fingerprintLoBits(int peptideId) {
return fingerprints[peptideId].loBits();
}

/** Zero-allocation read of the y-ion fingerprint word for a peptide. */
public long fingerprintHiBits(int peptideId) {
return fingerprints[peptideId].hiBits();
}

public int[] peptidesInBucket(int bucket) {
if (bucket < 0 || bucket >= bucketEncoded.length) return new int[0];
byte[] enc = bucketEncoded[bucket];
if (enc == null) return new int[0];
return EliasFano.decode(enc);
}

public EliasFano.Cursor bucketCursor(int bucket) {
if (bucket < 0 || bucket >= bucketEncoded.length || bucketEncoded[bucket] == null) {
return EliasFano.open(EliasFano.encode(new int[0]));
}
return EliasFano.open(bucketEncoded[bucket]);
}
}
72 changes: 72 additions & 0 deletions src/main/java/edu/ucsd/msjava/fragindex/SlabBuilder.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
package edu.ucsd.msjava.fragindex;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.TreeMap;

/**
* Writable buffer for assembling a single slab of the fragment index.
*
* <p>Used during index build. Caller flow:
* <pre>
* SlabBuilder b = new SlabBuilder(slabId, minMassDa, maxMassDa);
* int pid = b.addPeptide(peptideMassDa);
* b.addFragment(pid, fragmentBucket, isB);
* ...
* Slab slab = b.finish();
* </pre>
*
* <p>Not thread-safe. Each builder is owned by a single build thread.
*/
public final class SlabBuilder {
private final int slabId;
private final double minMassDa;
private final double maxMassDa;
private final List<Fingerprint128> fingerprints = new ArrayList<>();
private final TreeMap<Integer, List<Integer>> bucketToPeptides = new TreeMap<>();
private boolean finished;

public SlabBuilder(int slabId, double minMassDa, double maxMassDa) {
this.slabId = slabId;
this.minMassDa = minMassDa;
this.maxMassDa = maxMassDa;
}

public int addPeptide(double precursorMassDa) {
requireNotFinished();
int pid = fingerprints.size();
fingerprints.add(new Fingerprint128());
return pid;
}

public void addFragment(int peptideId, int bucket, boolean isB) {
requireNotFinished();
Fingerprint128 fp = fingerprints.get(peptideId);
if (isB) fp.setBIonBucket(bucket);
else fp.setYIonBucket(bucket);
bucketToPeptides.computeIfAbsent(bucket, k -> new ArrayList<>()).add(peptideId);
}

public Slab finish() {
requireNotFinished();
finished = true;
int maxBucket = bucketToPeptides.isEmpty() ? 0 : bucketToPeptides.lastKey();
byte[][] bucketEncoded = new byte[maxBucket + 1][];
for (var entry : bucketToPeptides.entrySet()) {
List<Integer> pids = entry.getValue();
Collections.sort(pids);
int[] arr = new int[pids.size()];
for (int i = 0; i < arr.length; i++) arr[i] = pids.get(i);
bucketEncoded[entry.getKey()] = EliasFano.encode(arr);
}
Fingerprint128[] fpArr = fingerprints.toArray(new Fingerprint128[0]);
return new Slab(slabId, minMassDa, maxMassDa, fpArr, bucketEncoded);
}

private void requireNotFinished() {
if (finished) {
throw new IllegalStateException("SlabBuilder is single-use; finish() already called");
}
}
}
28 changes: 28 additions & 0 deletions src/test/java/edu/ucsd/msjava/fragindex/TestDirectStore.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
package edu.ucsd.msjava.fragindex;

import org.junit.Assert;
import org.junit.Test;

public class TestDirectStore {

@Test
public void putAndOpenSlab() {
DirectStore store = new DirectStore(/*slabCount=*/2);

SlabBuilder b0 = new SlabBuilder(0, 500.0, 550.0);
int pid = b0.addPeptide(510.0);
b0.addFragment(pid, 10, true);
store.putSlab(0, b0.finish());

Slab read = store.openSlab(0);
Assert.assertEquals(0, read.slabId());
Assert.assertEquals(1, read.peptideCount());
Assert.assertArrayEquals(new int[]{0}, read.peptidesInBucket(10));
}

@Test
public void openUnsetSlabReturnsNull() {
DirectStore store = new DirectStore(2);
Assert.assertNull(store.openSlab(1));
}
}
55 changes: 55 additions & 0 deletions src/test/java/edu/ucsd/msjava/fragindex/TestEliasFano.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
package edu.ucsd.msjava.fragindex;

import org.junit.Assert;
import org.junit.Test;

public class TestEliasFano {

@Test
public void emptyListRoundTrip() {
byte[] encoded = EliasFano.encode(new int[0]);
Assert.assertNotNull(encoded);
int[] decoded = EliasFano.decode(encoded);
Assert.assertEquals(0, decoded.length);
}

@Test
public void singleValueRoundTrip() {
int[] original = {42};
int[] decoded = EliasFano.decode(EliasFano.encode(original));
Assert.assertArrayEquals(original, decoded);
}

@Test
public void monotonicListRoundTrip() {
int[] original = {0, 1, 5, 12, 12, 18, 31, 47};
int[] decoded = EliasFano.decode(EliasFano.encode(original));
Assert.assertArrayEquals(original, decoded);
}

@Test
public void largeRangeRoundTrip() {
int[] original = new int[1000];
for (int i = 0; i < original.length; i++) original[i] = i * 53;
int[] decoded = EliasFano.decode(EliasFano.encode(original));
Assert.assertArrayEquals(original, decoded);
}

@Test
public void iteratorMatchesArray() {
int[] original = {2, 3, 5, 7, 11, 13, 17, 19};
byte[] encoded = EliasFano.encode(original);
EliasFano.Cursor it = EliasFano.open(encoded);
int i = 0;
while (it.hasNext()) {
Assert.assertEquals(original[i++], it.next());
}
Assert.assertEquals(original.length, i);
}

@Test
public void iteratorOnEmpty() {
EliasFano.Cursor it = EliasFano.open(EliasFano.encode(new int[0]));
Assert.assertFalse(it.hasNext());
}
}
Loading
Loading