pSpitzner · semohr · Jun 10, 2026 · May 20, 2026 · May 20, 2026 · May 20, 2026
diff --git a/backend/alembic/versions/2026_04_12_2038-f06e470b3d1e_match.py b/backend/alembic/versions/2026_04_12_2038-f06e470b3d1e_match.py
@@ -215,13 +215,13 @@ def migrate_data():
             # We depend on our mappers here and hope they do not change in the future
             db_match: Any
             if isinstance(beets_match, AlbumMatchStub):
-                db_match = AlbumMatchMapper().from_beets(
+                db_match = AlbumMatchMapper().to_db(
                     beets_match,  # type: ignore[arg-type]
                     Context(),
                 )
 
             else:
-                db_match = TrackMatchMapper().from_beets(
+                db_match = TrackMatchMapper().to_db(
                     beets_match,  # type: ignore[arg-type]
                     Context(),
                 )

diff --git a/backend/alembic/versions/2026_05_20_2120-25649aa3ba78_added_item_reference_to_trackmatch.py b/backend/alembic/versions/2026_05_20_2120-25649aa3ba78_added_item_reference_to_trackmatch.py
@@ -0,0 +1,113 @@
+"""Added item reference to TrackMatch
+
+Revision ID: 25649aa3ba78
+Revises: f06e470b3d1e
+Create Date: 2026-05-20 21:20:11.140311
+
+"""
+
+from collections.abc import Sequence
+
+import sqlalchemy as sa
+
+from beets_flask.logger import logging
+from alembic import op
+
+log = logging.getLogger("alembic.runtime.migration")
+
+
+# revision identifiers, used by Alembic.
+revision: str = "25649aa3ba78"
+down_revision: str | Sequence[str] | None = "f06e470b3d1e"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+
+def upgrade() -> None:
+    """Upgrade schema."""
+    with op.batch_alter_table("matches_track") as batch_op:
+        batch_op.add_column(sa.Column("item_id", sa.String(), nullable=False))
+        batch_op.create_foreign_key(
+            "fk_matches_track_items", "items", ["item_id"], ["id"]
+        )
+    with op.batch_alter_table("candidate") as batch_op:
+        batch_op.drop_column("mapping")
+
+    dedup_items()
+
+
+def downgrade() -> None:
+    """Downgrade schema."""
+    op.drop_constraint("fk_matches_track_items", "matches_track", type_="foreignkey")
+    op.drop_column("matches_track", "item_id")
+
+
+def dedup_items() -> None:
+    """Collapse duplicate Item rows created when task.items and
+    match.mapping keys were separate Python objects during serialization.
+    Keeps the oldest row per (track, title) and updates all FK refs."""
+    conn = op.get_bind()
+
+    items = conn.execute(
+        sa.text("""
+            SELECT id,
+                   json_extract(fixed_values, '$.track') AS track,
+                   json_extract(fixed_values, '$.title') AS title
+            FROM items
+            ORDER BY created_at ASC
+        """)
+    ).fetchall()
+
+    seen: dict[tuple, str] = {}  # (track, title) -> canonical_id
+    orphan_map: dict[str, str] = {}  # orphan_id -> canonical_id
+    for row in items:
+        key = (row.track, row.title)
+        if key in seen:
+            orphan_map[row.id] = seen[key]
+        else:
+            seen[key] = row.id
+
+    if not orphan_map:
+        log.info("No duplicate Item rows found")
+        return
+
+    log.info("Deduping %d duplicate Item rows", len(orphan_map))
+
+    # Batch updates in chunks of 500 to stay under SQLite parameter limits
+    CHUNK = 500
+    items_list = list(orphan_map.items())
+    for start in range(0, len(items_list), CHUNK):
+        chunk = dict(items_list[start : start + CHUNK])
+        if start > 0:
+            log.info("Deduping items %d / %d", start, len(orphan_map))
+
+        # Build CASE expression with parameters
+        cases = []
+        params: dict[str, str] = {}
+        for j, (orphan_id, canonical_id) in enumerate(chunk.items()):
+            params[f"o{j}"] = orphan_id
+            params[f"c{j}"] = canonical_id
+            cases.append(f"WHEN :o{j} THEN :c{j}")
+        case_expr = " ".join(cases)
+        in_list = ", ".join(f":o{j}" for j in range(len(chunk)))
+
+        conn.execute(
+            sa.text(
+                f"UPDATE tasks_items SET item_id = CASE item_id {case_expr} "
+                f"END WHERE item_id IN ({in_list})"
+            ),
+            params,
+        )
+        conn.execute(
+            sa.text(
+                f"UPDATE album_match_track_mappings SET item_id = "
+                f"CASE item_id {case_expr} END WHERE item_id IN ({in_list})"
+            ),
+            params,
+        )
+        conn.execute(
+            sa.text(f"DELETE FROM items WHERE id IN ({in_list})"),
+            params,
+        )
+
+    log.info("Deduped %d duplicate Item rows", len(orphan_map))
diff --git a/backend/beets_flask/database/mapper/base.py b/backend/beets_flask/database/mapper/base.py
@@ -17,47 +17,57 @@ def __init__(self):
         self.to_cache: dict[int, Any] = {}
 
 
-class BeetsMapper(Protocol[B, M]):
+class DBMapper(Protocol[B, M]):
     """Protocol for bidirectional mapping between Beets objects and models.
 
     This mapper provides cached conversion in both directions:
-    - Beets → Model via `from_beets`
-    - Model → Beets via `to_beets`
+    - Beets|LiveState → Model via `to_db`
+    - Model → Beets|LiveState via `from_db`
 
     Identity-based caching (via `id()`) ensures:
     - stable object graphs during recursive mapping
     - prevention of infinite recursion
     - consistent reuse of already-mapped instances
 
     Subclasses must implement:
-    - `_from_beets`
-    - `_to_beets`
+    - `_to_db`
+    - `_from_db`
+
+    This solves the following problem:
+    Consider we want to deserialize a Task with Candidates C1 and C2, where
+    C1 and C2 hold references to the task and vice versa.
+    - C1(ref to Task)
+    - C2(ref to Task)
+    - Task(C1,C2)
+    We dont want to create copies of the objects, references only!
+    The mapper avoids drilling and thinking about this more than necessary :)
     """
 
-    def from_beets(self, obj: B, ctx: Context) -> M:
+    def to_db(self, obj: B, ctx: Context) -> M:
         """Convert a Beets object into a model instance with caching."""
         key = id(obj)
-        if key in ctx.from_cache:
-            return ctx.from_cache[key]
+        if key in ctx.to_cache:
+            return ctx.to_cache[key]
 
-        result = self._from_beets(obj, ctx)
-        ctx.from_cache[key] = result
-        return result
+        model = self._to_db(obj, ctx)
+        ctx.to_cache[key] = model
+        return model
 
-    def to_beets(self, model: M, ctx: Context) -> B:
+    def from_db(self, model: M, ctx: Context) -> B:
         """Convert a model instance back into a Beets object with caching."""
         key = id(model)
-        if key in ctx.to_cache:
-            return ctx.to_cache[key]
+        if key in ctx.from_cache:
+            return ctx.from_cache[key]
 
-        result = self._to_beets(model, ctx)
-        ctx.to_cache[key] = result
-        return result
+        # Backward-compatible single-phase path
+        obj = self._from_db(model, ctx)
+        ctx.from_cache[key] = obj
+        return obj
 
-    def _from_beets(self, obj: B, ctx: Context) -> M:
+    def _to_db(self, obj: B, ctx: Context) -> M:
         """Implement Beets → model conversion."""
         raise NotImplementedError
 
-    def _to_beets(self, model: M, ctx: Context) -> B:
+    def _from_db(self, model: M, ctx: Context) -> B:
         """Implement model → Beets conversion."""
         raise NotImplementedError