From 6fba11c307ad509ba538f4b2ad184637f5aba563 Mon Sep 17 00:00:00 2001 From: Sebastian Mohr Date: Wed, 20 May 2026 20:54:28 +0200 Subject: [PATCH 01/12] Incremented beets version to 2.8.0 --- backend/pyproject.toml | 3 +-- backend/uv.lock | 24 ++++-------------------- 2 files changed, 5 insertions(+), 22 deletions(-) diff --git a/backend/pyproject.toml b/backend/pyproject.toml index 30a69e0f..6c169408 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -15,7 +15,7 @@ classifiers = [ dependencies = [ "quart>=0.20.0", "confuse>=2.0.1", - "beets==2.7.1", + "beets==2.8.0", "sqlalchemy>=2.0.35", "rq>=2.0.0", "watchdog>=5.0.3", @@ -79,7 +79,6 @@ typed = [ "types-Deprecated", "types-aiofiles", "types-pyyaml", - "pandas-stubs", ] [build-system] diff --git a/backend/uv.lock b/backend/uv.lock index b47cdc30..d7ca86c0 100644 --- a/backend/uv.lock +++ b/backend/uv.lock @@ -203,7 +203,7 @@ wheels = [ [[package]] name = "beets" -version = "2.7.1" +version = "2.8.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "colorama", marker = "sys_platform == 'win32'" }, @@ -221,9 +221,9 @@ dependencies = [ { name = "typing-extensions" }, { name = "unidecode" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/ff/9d/4f72fc5f772ca958f48263ca688bc60752bc7d752a248d6191c62e3bafb1/beets-2.7.1.tar.gz", hash = "sha256:95f20c4087be2f4b2ab400be07cbea380bec2720e2a83180b025a972ee94bd22", size = 2189644, upload-time = "2026-03-08T08:31:30.568Z" } +sdist = { url = "https://files.pythonhosted.org/packages/01/69/cfb8188520f2b50988e71a2c3d451874b819395b01fc63fa583d14311e8b/beets-2.8.0.tar.gz", hash = "sha256:5db90eddffe640e42a76e5207adc0993d2ce3d96e6a64ce3f2bfbd12d7b3a3cf", size = 2195951, upload-time = "2026-03-28T13:11:31.076Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/30/3b/1ad6e0c1f3e3d3e6113634ec97f372b00f1caad5ead91b9744358964b4e2/beets-2.7.1-py3-none-any.whl", hash = "sha256:bbe5a6b29aa55f520e635ef9d4222d8f3d53431bd0d78c68ee5c8bb0f80426f1", size = 610663, upload-time = "2026-03-08T08:31:28.363Z" }, + { url = "https://files.pythonhosted.org/packages/a7/8f/8cc7713b92842ec487d9801631cd61cb2e535f7144c2eb4a0bf06ad5f1f8/beets-2.8.0-py3-none-any.whl", hash = "sha256:10a4e19c6205d54060557b4ade82046cb9604f2c229f35d7a6b49074475383c0", size = 615487, upload-time = "2026-03-28T13:11:28.921Z" }, ] [[package]] @@ -265,7 +265,6 @@ dev = [ { name = "mypy" }, { name = "myst-nb" }, { name = "myst-parser" }, - { name = "pandas-stubs" }, { name = "paracelsus" }, { name = "pre-commit" }, { name = "pytest" }, @@ -304,7 +303,6 @@ test = [ ] typed = [ { name = "mypy" }, - { name = "pandas-stubs" }, { name = "types-aiofiles" }, { name = "types-cachetools" }, { name = "types-deprecated" }, @@ -317,7 +315,7 @@ requires-dist = [ { name = "aiofiles" }, { name = "aiohttp" }, { name = "alembic", specifier = ">=1.18.4" }, - { name = "beets", specifier = "==2.7.1" }, + { name = "beets", specifier = "==2.8.0" }, { name = "cachetools", specifier = ">=5.3.3" }, { name = "confuse", specifier = ">=2.0.1" }, { name = "deprecated", specifier = ">=1.2.18" }, @@ -348,7 +346,6 @@ dev = [ { name = "mypy", specifier = ">=1.14.1" }, { name = "myst-nb", specifier = ">=1.1.2" }, { name = "myst-parser", specifier = ">=4.0.0" }, - { name = "pandas-stubs" }, { name = "paracelsus", specifier = ">=0.15.0" }, { name = "pre-commit", specifier = ">=3.8.0" }, { name = "pytest", specifier = ">=8.2.2" }, @@ -387,7 +384,6 @@ test = [ ] typed = [ { name = "mypy", specifier = ">=1.14.1" }, - { name = "pandas-stubs" }, { name = "types-aiofiles" }, { name = "types-cachetools" }, { name = "types-deprecated" }, @@ -1439,18 +1435,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/df/b2/87e62e8c3e2f4b32e5fe99e0b86d576da1312593b39f47d8ceef365e95ed/packaging-26.2-py3-none-any.whl", hash = "sha256:5fc45236b9446107ff2415ce77c807cee2862cb6fac22b8a73826d0693b0980e", size = 100195, upload-time = "2026-04-24T20:15:22.081Z" }, ] -[[package]] -name = "pandas-stubs" -version = "3.0.0.260204" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "numpy" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/27/1d/297ff2c7ea50a768a2247621d6451abb2a07c0e9be7ca6d36ebe371658e5/pandas_stubs-3.0.0.260204.tar.gz", hash = "sha256:bf9294b76352effcffa9cb85edf0bed1339a7ec0c30b8e1ac3d66b4228f1fbc3", size = 109383, upload-time = "2026-02-04T15:17:17.247Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/7c/2f/f91e4eee21585ff548e83358332d5632ee49f6b2dcd96cb5dca4e0468951/pandas_stubs-3.0.0.260204-py3-none-any.whl", hash = "sha256:5ab9e4d55a6e2752e9720828564af40d48c4f709e6a2c69b743014a6fcb6c241", size = 168540, upload-time = "2026-02-04T15:17:15.615Z" }, -] - [[package]] name = "paracelsus" version = "0.15.0" From 467111eaa8c181695490f6bf6e920d1a5616c4f4 Mon Sep 17 00:00:00 2001 From: Sebastian Mohr Date: Wed, 20 May 2026 21:11:07 +0200 Subject: [PATCH 02/12] Added new item reference in TrackMatch to our database mapper logic. Beets since v2.8.0 directly references the items in the matches, lets do the same. --- backend/beets_flask/database/mapper/match.py | 3 +++ backend/beets_flask/database/models/match.py | 5 +++++ backend/beets_flask/database/models/states.py | 12 +++++++--- .../unit/test_database/mapper/test_match.py | 22 ++++++++++++++++--- 4 files changed, 36 insertions(+), 6 deletions(-) diff --git a/backend/beets_flask/database/mapper/match.py b/backend/beets_flask/database/mapper/match.py index 6e222abc..7bcb64c9 100644 --- a/backend/beets_flask/database/mapper/match.py +++ b/backend/beets_flask/database/mapper/match.py @@ -138,17 +138,20 @@ class TrackMatchMapper(BeetsMapper[BeetsTrackMatch, TrackMatch]): def __init__(self): self.track_info_mapper = TrackInfoMapper() self.distance_mapper = DistanceMapper() + self.item_mapper = ItemMapper() def _from_beets(self, obj: BeetsTrackMatch, ctx: Context) -> TrackMatch: return TrackMatch( info=self.track_info_mapper.from_beets(obj.info, ctx), distance=self.distance_mapper.from_beets(obj.distance, ctx), + item=self.item_mapper.from_beets(obj.item, ctx), ) def _to_beets(self, model: TrackMatch, ctx: Context) -> BeetsTrackMatch: return BeetsTrackMatch( info=self.track_info_mapper.to_beets(model.info, ctx), distance=self.distance_mapper.to_beets(model.distance, ctx), + item=self.item_mapper.to_beets(model.item, ctx), ) diff --git a/backend/beets_flask/database/models/match.py b/backend/beets_flask/database/models/match.py index 49c8398a..f567aa6f 100644 --- a/backend/beets_flask/database/models/match.py +++ b/backend/beets_flask/database/models/match.py @@ -174,7 +174,10 @@ class TrackMatch(Match): id: Mapped[str] = mapped_column(ForeignKey("matches.id"), primary_key=True) info_id: Mapped[str] = mapped_column(ForeignKey("track_info.id")) + item_id: Mapped[str] = mapped_column(ForeignKey("items.id")) + info: Mapped[TrackInfo] = relationship() + item: Mapped[Item] = relationship() __mapper_args__ = { "polymorphic_identity": "track", @@ -184,10 +187,12 @@ def __init__( self, info: TrackInfo, distance: Distance, + item: Item, id: str | None = None, ) -> None: self.info = info self.distance = distance + self.item = item super().__init__(id) diff --git a/backend/beets_flask/database/models/states.py b/backend/beets_flask/database/models/states.py index 82bff105..22935d38 100644 --- a/backend/beets_flask/database/models/states.py +++ b/backend/beets_flask/database/models/states.py @@ -429,7 +429,8 @@ def from_live_state(cls, state: TaskState) -> TaskStateInDb: TaskItem(item=mapper.from_beets(item, ctx)) for item in state.items ], candidates=[ - CandidateStateInDb.from_live_state(c) for c in state.candidate_states + CandidateStateInDb.from_live_state(c, ctx) + for c in state.candidate_states ], chosen_candidate_id=state.chosen_candidate_state_id, progress=state.progress.progress, @@ -499,6 +500,10 @@ class CandidateStateInDb(Base): duplicate_ids: Mapped[str] # association between tracks online and items on disk, from int to int + # TODO: !! + # We should be able to remove this as there now is an + # direct item linkage + # !! mapping: Mapped[dict[int, int]] def __init__( @@ -515,11 +520,12 @@ def __init__( self.mapping = mapping @classmethod - def from_live_state(cls, state: CandidateState) -> CandidateStateInDb: + def from_live_state(cls, state: CandidateState, ctx: Context) -> CandidateStateInDb: """Create the DB representation of a live CandidateState.""" + return cls( id=state.id, - match=MatchMapper().from_beets(state.match, Context()), + match=MatchMapper().from_beets(state.match, ctx), duplicate_ids=state.duplicate_ids, mapping=state._mapping, ) diff --git a/backend/tests/unit/test_database/mapper/test_match.py b/backend/tests/unit/test_database/mapper/test_match.py index edc1565e..0f2abe63 100644 --- a/backend/tests/unit/test_database/mapper/test_match.py +++ b/backend/tests/unit/test_database/mapper/test_match.py @@ -15,6 +15,7 @@ TrackMatchMapper, ) from beets_flask.database.models.match import TrackInfo +from beets_flask.importer.types import BeetsItem from tests.conftest import beets_lib_item @@ -304,7 +305,15 @@ def test_roundtrip_conversion(self): length=180.0, index=1, ) - original = BeetsTrackMatch(distance=track_distance, info=beets_track1) + beets_item = BeetsItem( + title="Test Item 1", + ) + + original = BeetsTrackMatch( + distance=track_distance, + info=beets_track1, + item=beets_item, + ) mapper = TrackMatchMapper() ctx = Context() @@ -316,6 +325,7 @@ def test_roundtrip_conversion(self): assert model.info.data["artist"] == "Test Artist" assert model.info.data["length"] == 180.0 assert model.distance.raw_distance == track_distance.raw_distance + assert model.item.fixed_values["title"] == beets_item.title assert len(model.distance.penalties) == 2 # Test to_beets @@ -325,6 +335,7 @@ def test_roundtrip_conversion(self): assert result.info.artist == beets_track1.artist assert result.info.length == beets_track1.length assert result.distance.raw_distance == original.distance.raw_distance + assert result.item.title == beets_item.title # Verify penalties are preserved penalty_keys = {p.key for p in model.distance.penalties} @@ -356,9 +367,13 @@ def test_roundtrip_track_match(self): track_distance = BeetsDistance() track_distance.add("artist", 0.1) - beets_track = BeetsTrackInfo(title="Test Track") - beets_track_match = BeetsTrackMatch(distance=track_distance, info=beets_track) + beets_item = BeetsItem(title="Test Item 1") + beets_track_match = BeetsTrackMatch( + distance=track_distance, + info=beets_track, + item=beets_item, + ) mapper = MatchMapper() ctx = Context() @@ -369,3 +384,4 @@ def test_roundtrip_track_match(self): assert isinstance(result, BeetsTrackMatch) assert result.info.title == "Test Track" assert result.distance.raw_distance == track_distance.raw_distance + assert result.item.title == beets_item.title From fe2924dc3071bd9ac2255999338f1189dfe262f6 Mon Sep 17 00:00:00 2001 From: Sebastian Mohr Date: Wed, 20 May 2026 21:30:46 +0200 Subject: [PATCH 03/12] Added alembic migration --- ...ba78_added_item_reference_to_trackmatch.py | 36 +++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 backend/alembic/versions/2026_05_20_2120-25649aa3ba78_added_item_reference_to_trackmatch.py diff --git a/backend/alembic/versions/2026_05_20_2120-25649aa3ba78_added_item_reference_to_trackmatch.py b/backend/alembic/versions/2026_05_20_2120-25649aa3ba78_added_item_reference_to_trackmatch.py new file mode 100644 index 00000000..4d5df24a --- /dev/null +++ b/backend/alembic/versions/2026_05_20_2120-25649aa3ba78_added_item_reference_to_trackmatch.py @@ -0,0 +1,36 @@ +"""Added item reference to TrackMatch + +Revision ID: 25649aa3ba78 +Revises: f06e470b3d1e +Create Date: 2026-05-20 21:20:11.140311 + +""" + +from collections.abc import Sequence + +import sqlalchemy as sa + +from alembic import op + + +# revision identifiers, used by Alembic. +revision: str = "25649aa3ba78" +down_revision: str | Sequence[str] | None = "f06e470b3d1e" +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + + +def upgrade() -> None: + """Upgrade schema.""" + # ### commands auto generated by Alembic - please adjust! ### + op.add_column("matches_track", sa.Column("item_id", sa.String(), nullable=False)) + op.create_foreign_key(None, "matches_track", "items", ["item_id"], ["id"]) + # ### end Alembic commands ### + + +def downgrade() -> None: + """Downgrade schema.""" + # ### commands auto generated by Alembic - please adjust! ### + op.drop_constraint(None, "matches_track", type_="foreignkey") + op.drop_column("matches_track", "item_id") + # ### end Alembic commands ### From b2be98a6746a50aaf838c25285d226cc0a63de00 Mon Sep 17 00:00:00 2001 From: Sebastian Mohr Date: Wed, 20 May 2026 21:31:01 +0200 Subject: [PATCH 04/12] Fixed typing errors as beets 2.8.0 is a bit stricter --- backend/beets_flask/importer/session.py | 4 ++-- backend/beets_flask/importer/states.py | 10 ++++++---- backend/tests/unit/test_importer/test_states.py | 4 +++- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/backend/beets_flask/importer/session.py b/backend/beets_flask/importer/session.py index 3387f281..1633f582 100644 --- a/backend/beets_flask/importer/session.py +++ b/backend/beets_flask/importer/session.py @@ -500,7 +500,7 @@ def lookup_candidates(self, task: BeetsImportTask): task.lookup_candidates(search_ids) - if len(task.candidates) == 0: + if not task.candidates or len(task.candidates) == 0: raise NoCandidatesFoundException(persist_in_db=True) # Update our state @@ -984,7 +984,7 @@ def match_threshold(self, task: BeetsImportTask): except (AttributeError, TypeError): distance = 2.0 - if len(task.candidates) == 0: + if not task.candidates or len(task.candidates) == 0: raise NoCandidatesFoundException() if distance > self.import_threshold: diff --git a/backend/beets_flask/importer/states.py b/backend/beets_flask/importer/states.py index 6b55cc6c..6bebbef3 100644 --- a/backend/beets_flask/importer/states.py +++ b/backend/beets_flask/importer/states.py @@ -259,7 +259,9 @@ def __init__( # we might run into inconsistencies here, if candidates of the task # change. but I do not know when or why they would. self.task = task - self.candidate_states = [CandidateState(c, self) for c in self.task.candidates] + self.candidate_states = [ + CandidateState(c, self) for c in (self.task.candidates or []) + ] self.progress = ProgressState() def __repr__(self) -> str: @@ -278,7 +280,7 @@ def candidates( self, ) -> Sequence[BeetsAlbumMatch | BeetsTrackMatch]: """Task candidates, i.e. possible matches to choose from.""" - return self.task.candidates + return self.task.candidates or [] @property def asis_candidate_id(self) -> str: @@ -296,11 +298,11 @@ def add_candidates( insert_at: int = 0, ) -> list[CandidateState]: """Add new candidates to the selection state.""" - if len(self.task.candidates) == 0 or len(self.candidate_states) == 0: + if len(self.candidates) == 0 or len(self.candidate_states) == 0: insert_at = 0 # task.candidates is a sequence and thus immutable - _ = list(self.task.candidates) + _ = list(self.candidates) _[insert_at:insert_at] = candidates self.task.candidates = _ diff --git a/backend/tests/unit/test_importer/test_states.py b/backend/tests/unit/test_importer/test_states.py index dad53b57..f28f2282 100644 --- a/backend/tests/unit/test_importer/test_states.py +++ b/backend/tests/unit/test_importer/test_states.py @@ -67,11 +67,12 @@ def test_properties(self): assert task_state.items == [self.task.items[0]] assert task_state.progress == Progress.NOT_STARTED - assert len(task_state.candidate_states) == len(self.task.candidates) + assert len(task_state.candidate_states) == len(self.task.candidates or []) def test_best_candidate(self): task_state = self.task_state assert task_state.best_candidate_state is not None + assert self.task.candidates is not None assert task_state.best_candidate_state.match is self.task.candidates[0] self.task.candidates = [] @@ -115,6 +116,7 @@ def test_properties(self): assert isinstance(candidate, CandidateState) assert candidate.id is not None + assert task.candidates is not None assert candidate.match == task.candidates[0] assert candidate.task_state == self.task_state assert candidate.type == "album" From 1aee19406166c3bfc08286537327ca3dee795d7b Mon Sep 17 00:00:00 2001 From: Sebastian Mohr Date: Fri, 29 May 2026 23:35:44 +0200 Subject: [PATCH 05/12] Fixed issue with event beeing fired on de-serialization. --- backend/beets_flask/database/mapper/match.py | 23 +++++++++++++++++++- backend/beets_flask/importer/states.py | 6 ++++- 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/backend/beets_flask/database/mapper/match.py b/backend/beets_flask/database/mapper/match.py index 7bcb64c9..ae1391d1 100644 --- a/backend/beets_flask/database/mapper/match.py +++ b/backend/beets_flask/database/mapper/match.py @@ -217,7 +217,7 @@ def _to_beets(self, model: AlbumMatch, ctx: Context) -> BeetsAlbumMatch: elif tm.item is not None: extra_items.append(self.item_mapper.to_beets(tm.item, ctx)) - return BeetsAlbumMatch( + return self.create_without_event( distance=self.distance_mapper.to_beets(model.distance, ctx), info=self.album_info_mapper.to_beets(model.info, ctx), mapping=mapping, @@ -225,6 +225,27 @@ def _to_beets(self, model: AlbumMatch, ctx: Context) -> BeetsAlbumMatch: extra_tracks=extra_tracks, ) + @staticmethod + def create_without_event( + distance: BeetsDistance, + info: BeetsAlbumInfo, + mapping: dict[BeetsItem, BeetsTrackInfo], + extra_items: list[BeetsItem], + extra_tracks: list[BeetsTrackInfo], + ) -> BeetsAlbumMatch: + """Workaround to not fire 'albumn_matched' event from deserialization. + + see https://github.com/beetbox/beets/pull/6184/changes#r2554116434 + """ + match = object.__new__(BeetsAlbumMatch) + match.distance = distance + match.info = info + match.mapping = mapping + match.extra_items = extra_items + match.extra_tracks = extra_tracks + + return match + class ItemMapper(BeetsMapper[BeetsItem, Item]): def _to_beets(self, model: Item, ctx) -> BeetsItem: diff --git a/backend/beets_flask/importer/states.py b/backend/beets_flask/importer/states.py index 6bebbef3..118ee693 100644 --- a/backend/beets_flask/importer/states.py +++ b/backend/beets_flask/importer/states.py @@ -529,7 +529,10 @@ def _generate_kwargs(item): tracks = [BeetsTrackInfo(**_generate_kwargs(i)) for i in items] - match = BeetsAlbumMatch( + # This is a hacky workaround to not trigger the event for album creation... + from beets_flask.database.mapper.match import AlbumMatchMapper + + match = AlbumMatchMapper.create_without_event( distance=BeetsDistance(), info=BeetsAlbumInfo( tracks=tracks, @@ -539,6 +542,7 @@ def _generate_kwargs(item): extra_tracks=[], mapping={i: tracks[idx] for idx, i in enumerate(items)}, ) + candidate = cls(match=match, task_state=task_state) candidate.id = task_state.asis_candidate_id # As the asis candidate state is not maintained we not to From cfd04f851beab1969d38b5ed1061be1bf4c76411 Mon Sep 17 00:00:00 2001 From: Sebastian Mohr Date: Fri, 29 May 2026 23:36:05 +0200 Subject: [PATCH 06/12] Use global folder per beets version for test caching. --- backend/tests/conftest.py | 105 +++++++++++++++++++++----------------- 1 file changed, 57 insertions(+), 48 deletions(-) diff --git a/backend/tests/conftest.py b/backend/tests/conftest.py index 88c97d87..5340f8ec 100644 --- a/backend/tests/conftest.py +++ b/backend/tests/conftest.py @@ -3,7 +3,6 @@ import os import pickle import shutil -import tempfile from collections.abc import Callable, Generator from contextlib import _GeneratorContextManager from pathlib import Path @@ -212,58 +211,68 @@ def local_redis(monkeypatch): monkeypatch.undo() -lookup_cache_dir: Path +@pytest.fixture(scope="session", autouse=True) +def mock_tag_album(): + """Fixture that monkeypatches beets tag_album to use cached lookups. + Caches MusicBrainz lookups in ``/tmp/beets_lookup_cache_/`` so + they survive across test runs but are keyed by beets version. -@pytest.fixture(scope="module", autouse=True) -def mock_tag_album(): - """Fixture that monkeypatches beets tag_album to use cached lookups.""" - # Create temp lookup cache directory once per module - global lookup_cache_dir - - lookup_cache_dir = Path(tempfile.mkdtemp(prefix="beets_lookup_cache_")) - - original_tag_album = autotag.tag_album - autotag.tag_album = tag_album - yield lookup_cache_dir - autotag.tag_album = original_tag_album - - -def tag_album( - items, - search_artist: str | None = None, - search_name: str | None = None, - search_ids: list[str] = [], -): - global lookup_cache_dir - # Compute items hash based on the items - m = hashlib.md5() - for item in items: - m.update(item.path) - if search_artist: - m.update(search_artist.encode("utf-8")) - if search_name: - m.update(search_name.encode("utf-8")) - for search_id in search_ids: - m.update(search_id.encode("utf-8")) - items_hash = m.hexdigest()[:8] - - cache_file = lookup_cache_dir / f"lookup_{items_hash}.pickle" - if cache_file.exists(): - log.debug(f"Using cached lookup from temp dir {cache_file}") - with open(cache_file, "rb") as f: - return pickle.load(f) - else: - # TODO: This pickle contains absolute paths to the files - # while undesired (no use in having them in the git repo) its for now the - # easiest way... and we hope music brainz does not change its data too often! + We must patch both ``beets.autotag.tag_album`` (the public API) and + ``beets.importer.tasks.tag_album`` (where the actual call site lives, + imported directly from ``beets.autotag.match``, bypassing the public API). + """ + import beets + import beets.importer.tasks as tasks_mod + + # Persistent cache in /tmp, keyed by beets version + cache_dir = Path(f"/tmp/beets_lookup_cache_{beets.__version__}") + cache_dir.mkdir(parents=True, exist_ok=True) + + # Save originals from both patched locations. + # beeets.importer.tasks imports tag_album directly from + # beets.autotag.match (not via beets.autotag), so we must + # patch that module's attribute too. Use setattr to avoid + # pyright reportPrivateImportUsage on the re-imported name. + _original_autotag = autotag.tag_album + _original_tasks = getattr(tasks_mod, "tag_album") + + def _cached_tag_album( + items, + search_artist: str | None = None, + search_name: str | None = None, + search_ids: list[str] = [], + ): + # Compute stable hash from items and search parameters + m = hashlib.md5() + for item in items: + m.update(item.path) + if search_artist: + m.update(search_artist.encode("utf-8")) + if search_name: + m.update(search_name.encode("utf-8")) + for search_id in search_ids: + m.update(search_id.encode("utf-8")) + items_hash = m.hexdigest()[:8] + + cache_file = cache_dir / f"lookup_{items_hash}.pickle" + if cache_file.exists(): + log.debug(f"Using cached lookup from {cache_file}") + with open(cache_file, "rb") as f: + return pickle.load(f) + + # Real lookup on cache miss res = _tag_album(items, search_artist, search_name, search_ids) - - cache_file.parent.mkdir(parents=True, exist_ok=True) with open(cache_file, "wb") as f: pickle.dump(res, f) - return res + # Patch both locations so the wrapper intercepts all call paths + autotag.tag_album = _cached_tag_album + setattr(tasks_mod, "tag_album", _cached_tag_album) + + yield cache_dir -autotag.tag_album = tag_album + # Restore originals + autotag.tag_album = _original_autotag + setattr(tasks_mod, "tag_album", _original_tasks) From 19a37f7c4856fbe3e7b8e57f1000ca729e812122 Mon Sep 17 00:00:00 2001 From: Sebastian Mohr Date: Thu, 4 Jun 2026 22:56:04 +0200 Subject: [PATCH 07/12] Renamed mapper functions from_beets -> to_db, to_beets -> from_db. --- .../2026_04_12_2038-f06e470b3d1e_match.py | 4 +- backend/beets_flask/database/mapper/base.py | 39 ++++--- backend/beets_flask/database/mapper/match.py | 106 +++++++++--------- backend/beets_flask/database/models/states.py | 8 +- .../unit/test_database/mapper/test_match.py | 28 ++--- 5 files changed, 95 insertions(+), 90 deletions(-) diff --git a/backend/alembic/versions/2026_04_12_2038-f06e470b3d1e_match.py b/backend/alembic/versions/2026_04_12_2038-f06e470b3d1e_match.py index 93f0e7d9..c13dfe8f 100644 --- a/backend/alembic/versions/2026_04_12_2038-f06e470b3d1e_match.py +++ b/backend/alembic/versions/2026_04_12_2038-f06e470b3d1e_match.py @@ -215,13 +215,13 @@ def migrate_data(): # We depend on our mappers here and hope they do not change in the future db_match: Any if isinstance(beets_match, AlbumMatchStub): - db_match = AlbumMatchMapper().from_beets( + db_match = AlbumMatchMapper().to_db( beets_match, # type: ignore[arg-type] Context(), ) else: - db_match = TrackMatchMapper().from_beets( + db_match = TrackMatchMapper().to_db( beets_match, # type: ignore[arg-type] Context(), ) diff --git a/backend/beets_flask/database/mapper/base.py b/backend/beets_flask/database/mapper/base.py index 23dfe10a..423c34ac 100644 --- a/backend/beets_flask/database/mapper/base.py +++ b/backend/beets_flask/database/mapper/base.py @@ -17,12 +17,12 @@ def __init__(self): self.to_cache: dict[int, Any] = {} -class BeetsMapper(Protocol[B, M]): +class DBMapper(Protocol[B, M]): """Protocol for bidirectional mapping between Beets objects and models. This mapper provides cached conversion in both directions: - - Beets → Model via `from_beets` - - Model → Beets via `to_beets` + - Beets|LiveState → Model via `to_db` + - Model → Beets|LiveState via `from_db` Identity-based caching (via `id()`) ensures: - stable object graphs during recursive mapping @@ -30,34 +30,43 @@ class BeetsMapper(Protocol[B, M]): - consistent reuse of already-mapped instances Subclasses must implement: - - `_from_beets` - - `_to_beets` + - `_to_db` + - `_from_db` + + This solves the following problem: + Consider we want to deserialize a Task with Candidates C1 and C2, where + C1 and C2 hold references to the task and vice versa. + - C1(ref to Task) + - C2(ref to Task) + - Task(C1,C2) + We dont want to create copies of the objects, references only! + The mapper avoids drilling and thinking about this more than necessary :) """ - def from_beets(self, obj: B, ctx: Context) -> M: + def to_db(self, obj: B, ctx: Context) -> M: """Convert a Beets object into a model instance with caching.""" key = id(obj) if key in ctx.from_cache: return ctx.from_cache[key] - result = self._from_beets(obj, ctx) - ctx.from_cache[key] = result - return result + model = self._to_db(obj, ctx) + ctx.from_cache[key] = model + return model - def to_beets(self, model: M, ctx: Context) -> B: + def from_db(self, model: M, ctx: Context) -> B: """Convert a model instance back into a Beets object with caching.""" key = id(model) if key in ctx.to_cache: return ctx.to_cache[key] - result = self._to_beets(model, ctx) - ctx.to_cache[key] = result - return result + obj = self._from_db(model, ctx) + ctx.to_cache[key] = obj + return obj - def _from_beets(self, obj: B, ctx: Context) -> M: + def _to_db(self, obj: B, ctx: Context) -> M: """Implement Beets → model conversion.""" raise NotImplementedError - def _to_beets(self, model: M, ctx: Context) -> B: + def _from_db(self, model: M, ctx: Context) -> B: """Implement model → Beets conversion.""" raise NotImplementedError diff --git a/backend/beets_flask/database/mapper/match.py b/backend/beets_flask/database/mapper/match.py index ae1391d1..17dfe672 100644 --- a/backend/beets_flask/database/mapper/match.py +++ b/backend/beets_flask/database/mapper/match.py @@ -29,33 +29,29 @@ TrackInfo, TrackMatch, ) -from .base import BeetsMapper, Context +from .base import Context, DBMapper -class MatchMapper(BeetsMapper[BeetsAlbumMatch | BeetsTrackMatch, Match]): +class MatchMapper(DBMapper[BeetsAlbumMatch | BeetsTrackMatch, Match]): def __init__(self): self.album_mapper = AlbumMatchMapper() self.track_mapper = TrackMatchMapper() - def _from_beets( - self, obj: BeetsAlbumMatch | BeetsTrackMatch, ctx: Context - ) -> Match: + def _to_db(self, obj: BeetsAlbumMatch | BeetsTrackMatch, ctx: Context) -> Match: if isinstance(obj, BeetsAlbumMatch): - return self.album_mapper.from_beets(obj, ctx) + return self.album_mapper.to_db(obj, ctx) if isinstance(obj, BeetsTrackMatch): - return self.track_mapper.from_beets(obj, ctx) + return self.track_mapper.to_db(obj, ctx) raise TypeError(f"Unsupported beets obj type: {type(obj)}") - def _to_beets( - self, model: Match, ctx: Context - ) -> BeetsAlbumMatch | BeetsTrackMatch: + def _from_db(self, model: Match, ctx: Context) -> BeetsAlbumMatch | BeetsTrackMatch: if isinstance(model, AlbumMatch): - return self.album_mapper.to_beets(model, ctx) + return self.album_mapper.from_db(model, ctx) if isinstance(model, TrackMatch): - return self.track_mapper.to_beets(model, ctx) + return self.track_mapper.from_db(model, ctx) raise TypeError(f"Unsupported model type: {type(model)}") @@ -63,49 +59,49 @@ def _to_beets( # ----------------------------------- Info ----------------------------------- # -class TrackInfoMapper(BeetsMapper[BeetsTrackInfo, TrackInfo]): - def _from_beets(self, obj: BeetsTrackInfo, ctx: Context) -> TrackInfo: +class TrackInfoMapper(DBMapper[BeetsTrackInfo, TrackInfo]): + def _to_db(self, obj: BeetsTrackInfo, ctx: Context) -> TrackInfo: data = {k: v for k, v in obj.items() if not k.startswith("_")} model = TrackInfo(data=data) return model - def _to_beets(self, model: TrackInfo, ctx: Context) -> BeetsTrackInfo: + def _from_db(self, model: TrackInfo, ctx: Context) -> BeetsTrackInfo: beets_obj = BeetsTrackInfo(**model.data) return beets_obj -class AlbumInfoMapper(BeetsMapper[BeetsAlbumInfo, AlbumInfo]): +class AlbumInfoMapper(DBMapper[BeetsAlbumInfo, AlbumInfo]): def __init__(self): self.track_mapper = TrackInfoMapper() - def _from_beets(self, obj: BeetsAlbumInfo, ctx: Context) -> AlbumInfo: + def _to_db(self, obj: BeetsAlbumInfo, ctx: Context) -> AlbumInfo: data = {k: v for k, v in obj.items()} data.pop("tracks", None) return AlbumInfo( - tracks=[self.track_mapper.from_beets(t, ctx) for t in obj.tracks], + tracks=[self.track_mapper.to_db(t, ctx) for t in obj.tracks], data=data, ) - def _to_beets(self, model: AlbumInfo, ctx: Context) -> BeetsAlbumInfo: + def _from_db(self, model: AlbumInfo, ctx: Context) -> BeetsAlbumInfo: data = dict(model.data) data.pop("tracks", None) return BeetsAlbumInfo( - tracks=[self.track_mapper.to_beets(t, ctx) for t in model.tracks], + tracks=[self.track_mapper.from_db(t, ctx) for t in model.tracks], **data, ) -class DistanceMapper(BeetsMapper[BeetsDistance, Distance]): +class DistanceMapper(DBMapper[BeetsDistance, Distance]): def __init__(self): self.track_mapper = TrackInfoMapper() - def _from_beets(self, obj: BeetsDistance, ctx: Context) -> Distance: + def _to_db(self, obj: BeetsDistance, ctx: Context) -> Distance: penalties = [Penalty(key=k, value=v) for k, v in obj._penalties.items()] track_distances: list[Distance] = [] for beets_track_info, track_distance in obj.tracks.items(): - child = self.from_beets(track_distance, ctx) - child.track_info = self.track_mapper.from_beets(beets_track_info, ctx) + child = self.to_db(track_distance, ctx) + child.track_info = self.track_mapper.to_db(beets_track_info, ctx) track_distances.append(child) return Distance( @@ -115,7 +111,7 @@ def _from_beets(self, obj: BeetsDistance, ctx: Context) -> Distance: track_distances=track_distances, ) - def _to_beets(self, model: Distance, ctx: Context) -> BeetsDistance: + def _from_db(self, model: Distance, ctx: Context) -> BeetsDistance: distance = BeetsDistance() for penalty in model.penalties: @@ -125,8 +121,8 @@ def _to_beets(self, model: Distance, ctx: Context) -> BeetsDistance: for track_distance in model.track_distances: if track_distance.track_info is not None: distance.tracks[ - self.track_mapper.to_beets(track_distance.track_info, ctx) - ] = self.to_beets(track_distance, ctx) + self.track_mapper.from_db(track_distance.track_info, ctx) + ] = self.from_db(track_distance, ctx) return distance @@ -134,45 +130,45 @@ def _to_beets(self, model: Distance, ctx: Context) -> BeetsDistance: # ---------------------------------- Matches --------------------------------- # -class TrackMatchMapper(BeetsMapper[BeetsTrackMatch, TrackMatch]): +class TrackMatchMapper(DBMapper[BeetsTrackMatch, TrackMatch]): def __init__(self): self.track_info_mapper = TrackInfoMapper() self.distance_mapper = DistanceMapper() self.item_mapper = ItemMapper() - def _from_beets(self, obj: BeetsTrackMatch, ctx: Context) -> TrackMatch: + def _to_db(self, obj: BeetsTrackMatch, ctx: Context) -> TrackMatch: return TrackMatch( - info=self.track_info_mapper.from_beets(obj.info, ctx), - distance=self.distance_mapper.from_beets(obj.distance, ctx), - item=self.item_mapper.from_beets(obj.item, ctx), + info=self.track_info_mapper.to_db(obj.info, ctx), + distance=self.distance_mapper.to_db(obj.distance, ctx), + item=self.item_mapper.to_db(obj.item, ctx), ) - def _to_beets(self, model: TrackMatch, ctx: Context) -> BeetsTrackMatch: + def _from_db(self, model: TrackMatch, ctx: Context) -> BeetsTrackMatch: return BeetsTrackMatch( - info=self.track_info_mapper.to_beets(model.info, ctx), - distance=self.distance_mapper.to_beets(model.distance, ctx), - item=self.item_mapper.to_beets(model.item, ctx), + info=self.track_info_mapper.from_db(model.info, ctx), + distance=self.distance_mapper.from_db(model.distance, ctx), + item=self.item_mapper.from_db(model.item, ctx), ) -class AlbumMatchMapper(BeetsMapper[BeetsAlbumMatch, AlbumMatch]): +class AlbumMatchMapper(DBMapper[BeetsAlbumMatch, AlbumMatch]): def __init__(self): self.album_info_mapper = AlbumInfoMapper() self.distance_mapper = DistanceMapper() self.track_info_mapper = TrackInfoMapper() self.item_mapper = ItemMapper() - def _from_beets(self, obj: BeetsAlbumMatch, ctx: Context) -> AlbumMatch: + def _to_db(self, obj: BeetsAlbumMatch, ctx: Context) -> AlbumMatch: model = AlbumMatch( - info=self.album_info_mapper.from_beets(obj.info, ctx), - distance=self.distance_mapper.from_beets(obj.distance, ctx), + info=self.album_info_mapper.to_db(obj.info, ctx), + distance=self.distance_mapper.to_db(obj.distance, ctx), ) # extra tracks for extra_track in obj.extra_tracks: model.track_mappings.append( AlbumMatchTrackMapping( - track_info=self.track_info_mapper.from_beets(extra_track, ctx), + track_info=self.track_info_mapper.to_db(extra_track, ctx), item=None, ) ) @@ -182,7 +178,7 @@ def _from_beets(self, obj: BeetsAlbumMatch, ctx: Context) -> AlbumMatch: model.track_mappings.append( AlbumMatchTrackMapping( track_info=None, - item=self.item_mapper.from_beets(extra_item, ctx), + item=self.item_mapper.to_db(extra_item, ctx), ) ) @@ -190,14 +186,14 @@ def _from_beets(self, obj: BeetsAlbumMatch, ctx: Context) -> AlbumMatch: for item, track in obj.mapping.items(): model.track_mappings.append( AlbumMatchTrackMapping( - track_info=self.track_info_mapper.from_beets(track, ctx), - item=self.item_mapper.from_beets(item, ctx), + track_info=self.track_info_mapper.to_db(track, ctx), + item=self.item_mapper.to_db(item, ctx), ) ) return model - def _to_beets(self, model: AlbumMatch, ctx: Context) -> BeetsAlbumMatch: + def _from_db(self, model: AlbumMatch, ctx: Context) -> BeetsAlbumMatch: mapping: dict[BeetsItem, BeetsTrackInfo] = {} extra_items: list[BeetsItem] = [] extra_tracks: list[BeetsTrackInfo] = [] @@ -205,21 +201,21 @@ def _to_beets(self, model: AlbumMatch, ctx: Context) -> BeetsAlbumMatch: for tm in model.track_mappings: # pairs if tm.track_info is not None and tm.item is not None: - item = self.item_mapper.to_beets(tm.item, ctx) - track_info = self.track_info_mapper.to_beets(tm.track_info, ctx) + item = self.item_mapper.from_db(tm.item, ctx) + track_info = self.track_info_mapper.from_db(tm.track_info, ctx) mapping[item] = track_info # extra track elif tm.track_info is not None: - extra_tracks.append(self.track_info_mapper.to_beets(tm.track_info, ctx)) + extra_tracks.append(self.track_info_mapper.from_db(tm.track_info, ctx)) # extra item elif tm.item is not None: - extra_items.append(self.item_mapper.to_beets(tm.item, ctx)) + extra_items.append(self.item_mapper.from_db(tm.item, ctx)) return self.create_without_event( - distance=self.distance_mapper.to_beets(model.distance, ctx), - info=self.album_info_mapper.to_beets(model.info, ctx), + distance=self.distance_mapper.from_db(model.distance, ctx), + info=self.album_info_mapper.from_db(model.info, ctx), mapping=mapping, extra_items=extra_items, extra_tracks=extra_tracks, @@ -233,7 +229,7 @@ def create_without_event( extra_items: list[BeetsItem], extra_tracks: list[BeetsTrackInfo], ) -> BeetsAlbumMatch: - """Workaround to not fire 'albumn_matched' event from deserialization. + """Workaround to not fire 'album_matched' event from deserialization. see https://github.com/beetbox/beets/pull/6184/changes#r2554116434 """ @@ -247,14 +243,14 @@ def create_without_event( return match -class ItemMapper(BeetsMapper[BeetsItem, Item]): - def _to_beets(self, model: Item, ctx) -> BeetsItem: +class ItemMapper(DBMapper[BeetsItem, Item]): + def _from_db(self, model: Item, ctx) -> BeetsItem: return BeetsItem._awaken( fixed_values={k: self._decode(v) for k, v in model.fixed_values.items()}, flex_values={k: self._decode(v) for k, v in model.flex_values.items()}, ) - def _from_beets(self, obj: BeetsItem, ctx) -> Item: + def _to_db(self, obj: BeetsItem, ctx) -> Item: return Item( fixed_values={k: self._encode(v) for k, v in obj._values_fixed.items()}, flex_values={k: self._encode(v) for k, v in obj._values_flex.items()}, diff --git a/backend/beets_flask/database/models/states.py b/backend/beets_flask/database/models/states.py index 22935d38..675c72ba 100644 --- a/backend/beets_flask/database/models/states.py +++ b/backend/beets_flask/database/models/states.py @@ -381,7 +381,7 @@ class TaskStateInDb(Base): def items(self) -> list[BeetsItem]: ctx = Context() mapper = ItemMapper() - return [mapper.to_beets(row.item, ctx) for row in self.pending_items] + return [mapper.from_db(row.item, ctx) for row in self.pending_items] def __init__( self, @@ -426,7 +426,7 @@ def from_live_state(cls, state: TaskState) -> TaskStateInDb: toppath=str(state.toppath).encode("utf-8") if state.toppath else None, paths=state.task.paths, pending_items=[ - TaskItem(item=mapper.from_beets(item, ctx)) for item in state.items + TaskItem(item=mapper.to_db(item, ctx)) for item in state.items ], candidates=[ CandidateStateInDb.from_live_state(c, ctx) @@ -525,7 +525,7 @@ def from_live_state(cls, state: CandidateState, ctx: Context) -> CandidateStateI return cls( id=state.id, - match=MatchMapper().from_beets(state.match, ctx), + match=MatchMapper().to_db(state.match, ctx), duplicate_ids=state.duplicate_ids, mapping=state._mapping, ) @@ -535,7 +535,7 @@ def to_live_state(self, task_state: TaskState | None) -> CandidateState: if task_state is None: task_state = self.task.to_live_state() live_state = CandidateState( - MatchMapper().to_beets(self.match, Context()), + MatchMapper().from_db(self.match, ctx), task_state, mapping=self.mapping, ) diff --git a/backend/tests/unit/test_database/mapper/test_match.py b/backend/tests/unit/test_database/mapper/test_match.py index 0f2abe63..221f51ab 100644 --- a/backend/tests/unit/test_database/mapper/test_match.py +++ b/backend/tests/unit/test_database/mapper/test_match.py @@ -40,7 +40,7 @@ def test_roundtrip_conversion(self): ctx = Context() # Test from_beets - model = mapper.from_beets(original, ctx) + model = mapper.to_db(original, ctx) assert isinstance(model, TrackInfo) assert model.data["title"] == "Test Track" assert model.data["artist"] == "Test Artist" @@ -49,7 +49,7 @@ def test_roundtrip_conversion(self): assert model.data["index"] == 1 # Test to_beets - result = mapper.to_beets(model, ctx) + result = mapper.from_db(model, ctx) assert result.title == original.title assert result.artist == original.artist assert result.album == original.album @@ -80,14 +80,14 @@ def test_roundtrip_conversion(self): ctx = Context() # Test from_beets - model = mapper.from_beets(original, ctx) + model = mapper.to_db(original, ctx) assert model.data["year"] == 1 assert len(model.tracks) == 2 assert model.tracks[0].data["title"] == "a" assert model.tracks[1].data["title"] == "b" # Test to_beets - result = mapper.to_beets(model, ctx) + result = mapper.from_db(model, ctx) assert result.year == original.year assert len(result.tracks) == len(original.tracks) assert result.tracks[0].title == original.tracks[0].title @@ -112,12 +112,12 @@ def test_roundtrip_conversion(self): ctx = Context() # Test from_beets - model = mapper.from_beets(original, ctx) + model = mapper.to_db(original, ctx) assert model.max_distance == original.max_distance assert model.raw_distance == original.raw_distance # Test to_beets - result = mapper.to_beets(model, ctx) + result = mapper.from_db(model, ctx) assert result.distance == original.distance assert result.max_distance == original.max_distance assert result.raw_distance == original.raw_distance @@ -260,7 +260,7 @@ def test_roundtrip_conversion(self): ctx = Context() # Test from_beets conversion - model = mapper.from_beets(beets_album_match, ctx) + model = mapper.to_db(beets_album_match, ctx) assert model.info.data["album_id"] == "abc123" assert model.info.data["album"] == "Test Album" assert model.info.data["artist"] == "Test Artist" @@ -276,7 +276,7 @@ def test_roundtrip_conversion(self): ) # Test to_beets conversion - result = mapper.to_beets(model, ctx) + result = mapper.from_db(model, ctx) assert result.info.album_id == "abc123" assert result.info.album == "Test Album" assert result.info.artist == "Test Artist" @@ -319,7 +319,7 @@ def test_roundtrip_conversion(self): ctx = Context() # Test from_beets - model = mapper.from_beets(original, ctx) + model = mapper.to_db(original, ctx) assert isinstance(model.info, TrackInfo) assert model.info.data["title"] == "Test Track 1" assert model.info.data["artist"] == "Test Artist" @@ -329,7 +329,7 @@ def test_roundtrip_conversion(self): assert len(model.distance.penalties) == 2 # Test to_beets - result = mapper.to_beets(model, ctx) + result = mapper.from_db(model, ctx) assert isinstance(result, BeetsTrackMatch) assert result.info.title == beets_track1.title assert result.info.artist == beets_track1.artist @@ -354,8 +354,8 @@ def test_roundtrip_album_match(self): mapper = MatchMapper() ctx = Context() - model = mapper.from_beets(beets_album_match, ctx) - result = mapper.to_beets(model, ctx) + model = mapper.to_db(beets_album_match, ctx) + result = mapper.from_db(model, ctx) assert isinstance(result, BeetsAlbumMatch) assert result.info.album_id == "abc123" @@ -378,8 +378,8 @@ def test_roundtrip_track_match(self): mapper = MatchMapper() ctx = Context() - model = mapper.from_beets(beets_track_match, ctx) - result = mapper.to_beets(model, ctx) + model = mapper.to_db(beets_track_match, ctx) + result = mapper.from_db(model, ctx) assert isinstance(result, BeetsTrackMatch) assert result.info.title == "Test Track" From 4891ddb9d9106792050b056f4fcf3b503c21b64a Mon Sep 17 00:00:00 2001 From: Sebastian Mohr Date: Thu, 4 Jun 2026 23:15:04 +0200 Subject: [PATCH 08/12] Migrating the to and from live state functions into the mapper layer --- backend/beets_flask/database/mapper/states.py | 143 +++++ backend/beets_flask/database/models/states.py | 194 +------ .../server/routes/db_models/session.py | 6 +- .../unit/test_database/mapper/test_states.py | 504 ++++++++++++++++++ .../tests/unit/test_importer/test_states.py | 5 - 5 files changed, 680 insertions(+), 172 deletions(-) create mode 100644 backend/beets_flask/database/mapper/states.py create mode 100644 backend/tests/unit/test_database/mapper/test_states.py diff --git a/backend/beets_flask/database/mapper/states.py b/backend/beets_flask/database/mapper/states.py new file mode 100644 index 00000000..5860042b --- /dev/null +++ b/backend/beets_flask/database/mapper/states.py @@ -0,0 +1,143 @@ +import pickle + +from beets_flask.database.models.pending import TaskItem +from beets_flask.database.models.states import ( + CandidateStateInDb, + FolderInDb, + SessionStateInDb, + TaskStateInDb, +) +from beets_flask.importer.states import CandidateState, SessionState, TaskState +from beets_flask.importer.types import BeetsImportTask +from beets_flask.logger import log + +from .base import Context, DBMapper +from .match import ItemMapper, MatchMapper + + +class SessionStateMapper(DBMapper[SessionState, SessionStateInDb]): + def __init__(self, want_to_serialize=False) -> None: + """In the case of want_to_serialize we do not load a folder children.""" + self.task_mapper = TaskStateMapper() + self.want_to_serialize = want_to_serialize + + def _from_db(self, model: SessionStateInDb, ctx: Context) -> SessionState: + if self.want_to_serialize: + s_state = SessionState(model.folder.to_live_folder()) + else: + s_state = SessionState(model.folder.path) + + if s_state.folder_hash != model.folder.hash: + log.warning( + f"Folder hash mismatch for {model.folder.path}. " + f"Expected {model.folder.hash} but got {s_state.folder_hash}." + ) + s_state.id = model.id + s_state.created_at = model.created_at + s_state.updated_at = model.updated_at + s_state._task_states = [ + self.task_mapper.from_db(task, ctx) for task in model.tasks + ] + s_state.exc = pickle.loads(model.exc) if model.exc else None + return s_state + + def _to_db(self, obj: SessionState, ctx: Context) -> SessionStateInDb: + return SessionStateInDb( + id=obj.id, + folder=FolderInDb(obj.folder_path, obj.folder_hash), + tasks=[self.task_mapper.to_db(ts, ctx) for ts in obj.task_states], + progress=obj.progress.progress, + exc=obj.exc, + ) + + +class TaskStateMapper(DBMapper[TaskState, TaskStateInDb]): + def __init__(self) -> None: + self.item_mapper = ItemMapper() + self.candidate_mapper = CandidateStateMapper() + + def _from_db(self, model: TaskStateInDb, ctx: Context) -> TaskState: + """Recreate the live TaskState with underlying task from its stored version in the db.""" + + # We just assume it is a normal import task + beets_task = BeetsImportTask( + toppath=model.toppath, + paths=pickle.loads(model.paths), + items=[ + self.item_mapper.from_db(task_item.item, ctx) + for task_item in model.pending_items + ], + ) + beets_task.choice_flag = model.choice_flag + beets_task.cur_artist = model.cur_artist + beets_task.cur_album = model.cur_album + old_paths: list[bytes] | None = ( + pickle.loads(model.old_paths) if model.old_paths else None + ) + # TODO: Update type hints once beets is updated + beets_task.old_paths = old_paths # type: ignore + + obj = TaskState(beets_task) + obj.id = model.id + obj.created_at = model.created_at + obj.updated_at = model.updated_at + obj.candidate_states = [ + self.candidate_mapper.from_db(c, ctx) for c in model.candidates + ] + obj.chosen_candidate_state_id = model.chosen_candidate_id + obj.progress.progress = model.progress + + # Set candidate of beets_task + obj.task.candidates = [c.match for c in obj.candidate_states] + return obj + + def _to_db(self, obj: TaskState, ctx: Context) -> TaskStateInDb: + if hasattr(obj.task, "old_paths"): + old_paths = obj.task.old_paths + else: + old_paths = None + + return TaskStateInDb( + id=obj.id, + toppath=str(obj.toppath).encode("utf-8") if obj.toppath else None, + paths=obj.task.paths, + pending_items=[ + TaskItem(item=self.item_mapper.to_db(i, ctx)) for i in obj.items + ], + candidates=[ + self.candidate_mapper.to_db(c, ctx) for c in obj.candidate_states + ], + chosen_candidate_id=obj.chosen_candidate_state_id, + progress=obj.progress.progress, + choice_flag=obj.task.choice_flag, + cur_artist=obj.task.cur_artist, + cur_album=obj.task.cur_album, + old_paths=old_paths, + ) + + +class CandidateStateMapper(DBMapper[CandidateState, CandidateStateInDb]): + def __init__(self) -> None: + self.match_mapper = MatchMapper() + self.task_mapper = TaskStateMapper() + + def _from_db(self, model: CandidateStateInDb, ctx: Context) -> CandidateState: + obj = CandidateState( + match=self.match_mapper.from_db(model.match, ctx), + task_state=self.task_mapper.from_db(model.task, ctx), + ) + obj.id = model.id + obj.created_at = model.created_at + obj.updated_at = model.updated_at + obj.duplicate_ids = ( + # edge case: "".split() gives [''] + [] if len(model.duplicate_ids) == 0 else model.duplicate_ids.split(";") + ) + return obj + + def _to_db(self, obj: CandidateState, ctx: Context) -> CandidateStateInDb: + return CandidateStateInDb( + id=obj.id, + match=self.match_mapper.to_db(obj.match, ctx), + duplicate_ids=obj.duplicate_ids, + ) diff --git a/backend/beets_flask/database/models/states.py b/backend/beets_flask/database/models/states.py index 675c72ba..bf7afee2 100644 --- a/backend/beets_flask/database/models/states.py +++ b/backend/beets_flask/database/models/states.py @@ -16,7 +16,7 @@ import pickle from pathlib import Path -from beets.importer import Action, ImportTask +from beets.importer import Action from sqlalchemy import ( ForeignKey, UniqueConstraint, @@ -30,20 +30,11 @@ ) from beets_flask.database.mapper.base import Context -from beets_flask.database.mapper.match import ItemMapper, MatchMapper from beets_flask.database.models.base import Base from beets_flask.database.models.match import Match from beets_flask.disk import Archive, Folder from beets_flask.importer.progress import Progress -from beets_flask.importer.states import ( - CandidateState, - SerializedCandidateState, - SerializedSessionState, - SerializedTaskState, - SessionState, - TaskState, -) -from beets_flask.importer.types import BeetsItem +from beets_flask.importer.states import SessionState from beets_flask.logger import log from beets_flask.server.exceptions import SerializedException @@ -235,51 +226,10 @@ def __init__( self.progress = progress self.exc = pickle.dumps(exc) if exc else None - @classmethod - def from_live_state(cls, state: SessionState) -> SessionStateInDb: - """Create the DB representation of a live SessionState..""" - - session = cls( - folder=FolderInDb(state.folder_path, state.folder_hash), - id=state.id, - tasks=[TaskStateInDb.from_live_state(ts) for ts in state.task_states], - progress=state.progress.progress, - exc=state.exc, - ) - - return session - @property def folder_path(self) -> Path: return self.folder.path - def to_live_state(self, new_folder=True) -> SessionState: - """Recreate the live SessionState with underlying task from its stored version in the db. - - HACK: new_folder param is a bit hacky, as if we do not include the children if we - are not recomputing the folder hash. Might lead to some issues down the line. - """ - - if new_folder: - s_state = SessionState(self.folder.path) - else: - s_state = SessionState(self.folder.to_live_folder()) - - if s_state.folder_hash != self.folder.hash: - log.warning( - f"Folder hash mismatch for {self.folder.path}. " - f"Expected {self.folder.hash} but got {s_state.folder_hash}." - ) - s_state.id = self.id - s_state.created_at = self.created_at - s_state.updated_at = self.updated_at - s_state._task_states = [task.to_live_state(s_state) for task in self.tasks] - s_state.exc = pickle.loads(self.exc) if self.exc else None - return s_state - - def to_dict(self) -> SerializedSessionState: - return self.to_live_state(False).serialize() - @classmethod def get_by_hash_and_path( cls, @@ -323,6 +273,31 @@ def exception(self) -> SerializedException | None: """Returns the exception of the session if it failed.""" return pickle.loads(self.exc) if self.exc else None + def to_live_state(self): + """To live state. + + Outlook: We should remove this at some point once we refactor + the live_state logic! + """ + from beets_flask.database.mapper.states import SessionStateMapper + + mapper = SessionStateMapper() + ctx = Context() + return mapper.from_db(self, ctx) + + @classmethod + def from_live_state(cls, live_state: SessionState): + """From live state. + + Outlook: We should remove this at some point once we refactor + the live_state logic! + """ + from beets_flask.database.mapper.states import SessionStateMapper + + mapper = SessionStateMapper() + ctx = Context() + return mapper.to_db(live_state, ctx) + class TaskStateInDb(Base): """Represents an import task. @@ -377,12 +352,6 @@ class TaskStateInDb(Base): progress: Mapped[Progress] - @property - def items(self) -> list[BeetsItem]: - ctx = Context() - mapper = ItemMapper() - return [mapper.from_db(row.item, ctx) for row in self.pending_items] - def __init__( self, id: str | None = None, @@ -410,73 +379,6 @@ def __init__( self.cur_artist = cur_artist self.cur_album = cur_album - @classmethod - def from_live_state(cls, state: TaskState) -> TaskStateInDb: - """Create the DB representation of a live TaskState.""" - if hasattr(state.task, "old_paths"): - old_paths = state.task.old_paths - else: - old_paths = None - - ctx = Context() - mapper = ItemMapper() - - task = cls( - id=state.id, - toppath=str(state.toppath).encode("utf-8") if state.toppath else None, - paths=state.task.paths, - pending_items=[ - TaskItem(item=mapper.to_db(item, ctx)) for item in state.items - ], - candidates=[ - CandidateStateInDb.from_live_state(c, ctx) - for c in state.candidate_states - ], - chosen_candidate_id=state.chosen_candidate_state_id, - progress=state.progress.progress, - choice_flag=state.task.choice_flag, - cur_artist=state.task.cur_artist, - cur_album=state.task.cur_album, - old_paths=old_paths, - ) - return task - - def to_live_state(self, session_state: SessionState | None = None) -> TaskState: - """Recreate the live TaskState with underlying task from its stored version in the db.""" - - # We just assume it is a normal import task - beets_task = ImportTask( - toppath=self.toppath, - paths=pickle.loads(self.paths), - items=self.items, - ) - beets_task.choice_flag = self.choice_flag - beets_task.cur_artist = self.cur_artist - beets_task.cur_album = self.cur_album - old_paths: list[bytes] | None = ( - pickle.loads(self.old_paths) if self.old_paths else None - ) - # TODO: Update type hints once beets is updated - beets_task.old_paths = old_paths # type: ignore - - live_state = TaskState(beets_task) - live_state.id = self.id - live_state.created_at = self.created_at - live_state.updated_at = self.updated_at - live_state.candidate_states = [ - c.to_live_state(live_state) for c in self.candidates - ] - live_state.chosen_candidate_state_id = self.chosen_candidate_id - live_state.progress.progress = self.progress - - # Set candidate of beets_task - live_state.task.candidates = [c.match for c in live_state.candidate_states] - - return live_state - - def to_dict(self) -> SerializedTaskState: - return self.to_live_state().serialize() - class CandidateStateInDb(Base): """Represents a candidate (potential match) for an import task. @@ -497,19 +399,12 @@ class CandidateStateInDb(Base): match: Mapped[Match] = relationship() # Duplicate ids (if any) (beets_id) + # TODO: We should recompute the duplicates on fetching data from the database duplicate_ids: Mapped[str] - # association between tracks online and items on disk, from int to int - # TODO: !! - # We should be able to remove this as there now is an - # direct item linkage - # !! - mapping: Mapped[dict[int, int]] - def __init__( self, match: Match, - mapping: dict[int, int], duplicate_ids: list[str] = [], id: str | None = None, ): @@ -517,39 +412,6 @@ def __init__( self.match = match self.duplicate_ids = ";".join(map(str, duplicate_ids)) - self.mapping = mapping - - @classmethod - def from_live_state(cls, state: CandidateState, ctx: Context) -> CandidateStateInDb: - """Create the DB representation of a live CandidateState.""" - - return cls( - id=state.id, - match=MatchMapper().to_db(state.match, ctx), - duplicate_ids=state.duplicate_ids, - mapping=state._mapping, - ) - - def to_live_state(self, task_state: TaskState | None) -> CandidateState: - """Recreate the live CandidateState with underlying task from its stored version in the db.""" - if task_state is None: - task_state = self.task.to_live_state() - live_state = CandidateState( - MatchMapper().from_db(self.match, ctx), - task_state, - mapping=self.mapping, - ) - live_state.id = self.id - live_state.created_at = self.created_at - live_state.updated_at = self.updated_at - live_state.duplicate_ids = ( - # edge case: "".split() gives [''] - [] if len(self.duplicate_ids) == 0 else self.duplicate_ids.split(";") - ) - return live_state - - def to_dict(self) -> SerializedCandidateState: - return self.to_live_state(self.task.to_live_state()).serialize() __all__ = ["SessionStateInDb", "TaskStateInDb", "CandidateStateInDb"] diff --git a/backend/beets_flask/server/routes/db_models/session.py b/backend/beets_flask/server/routes/db_models/session.py index 428f5b04..d53265ac 100644 --- a/backend/beets_flask/server/routes/db_models/session.py +++ b/backend/beets_flask/server/routes/db_models/session.py @@ -8,6 +8,8 @@ from beets_flask import invoker from beets_flask.database import db_session_factory +from beets_flask.database.mapper.base import Context +from beets_flask.database.mapper.states import SessionStateMapper from beets_flask.database.models.states import ( FolderInDb, SessionStateInDb, @@ -72,7 +74,9 @@ async def get_by_folder(self): status_code=200, ) - return jsonify(item.to_dict()) + mapper = SessionStateMapper(want_to_serialize=True) + live_state = mapper.from_db(item, Context()) + return jsonify(live_state.serialize()) async def enqueue(self): """Start a new session for a given folder hash or enqueue a new job for an existing session. diff --git a/backend/tests/unit/test_database/mapper/test_states.py b/backend/tests/unit/test_database/mapper/test_states.py new file mode 100644 index 00000000..9f6af72d --- /dev/null +++ b/backend/tests/unit/test_database/mapper/test_states.py @@ -0,0 +1,504 @@ +"""Tests for the state mappers: SessionStateMapper, TaskStateMapper, CandidateStateMapper. + +These tests verify bidirectional (roundtrip) conversion between live state objects +and their database model representations, following the same pattern as test_match.py. +""" + +import pickle +from pathlib import Path + +import pytest +from beets import importer +from beets.autotag.distance import Distance as BeetsDistance +from beets.autotag.hooks import AlbumMatch as BeetsAlbumMatch +from beets.autotag.hooks import TrackInfo as BeetsTrackInfo +from beets.autotag.hooks import TrackMatch as BeetsTrackMatch + +from beets_flask.database.mapper.base import Context +from beets_flask.database.mapper.match import ItemMapper, MatchMapper +from beets_flask.database.mapper.states import ( + CandidateStateMapper, + SessionStateMapper, + TaskStateMapper, +) +from beets_flask.database.models.match import AlbumMatch +from beets_flask.database.models.states import ( + CandidateStateInDb, + SessionStateInDb, + TaskStateInDb, +) +from beets_flask.importer.states import CandidateState, SessionState, TaskState +from beets_flask.importer.types import BeetsItem +from tests.conftest import beets_lib_item +from tests.unit.test_database.mapper.test_match import create_beets_album_match + +# --------------------------------------------------------------------------- +# Helper: build mapper instances without triggering infinite recursion +# --------------------------------------------------------------------------- +# The mappers have a circular __init__ dependency: +# TaskStateMapper -> CandidateStateMapper -> TaskStateMapper -> ... +# We bypass __init__ via object.__new__ and wire up the sub-mappers manually, +# sharing instances to break the cycle. + + +def _build_candidate_mapper() -> CandidateStateMapper: + """Build a CandidateStateMapper with a shared TaskStateMapper to avoid recursion.""" + cm = object.__new__(CandidateStateMapper) + cm.match_mapper = MatchMapper() + cm.task_mapper = _build_task_mapper(candidate_mapper=cm) + return cm + + +def _build_task_mapper( + candidate_mapper: CandidateStateMapper | None = None, +) -> TaskStateMapper: + """Build a TaskStateMapper, optionally reusing an existing CandidateStateMapper.""" + tm = object.__new__(TaskStateMapper) + tm.item_mapper = ItemMapper() + if candidate_mapper is not None: + tm.candidate_mapper = candidate_mapper + else: + tm.candidate_mapper = _build_candidate_mapper() + return tm + + +def _build_session_mapper(want_to_serialize: bool = False) -> SessionStateMapper: + """Build a SessionStateMapper that owns a fresh TaskStateMapper chain.""" + sm = object.__new__(SessionStateMapper) + sm.task_mapper = _build_task_mapper() + sm.want_to_serialize = want_to_serialize + return sm + + +# --------------------------------------------------------------------------- +# Helper: create a minimal BeetsImportTask for testing +# --------------------------------------------------------------------------- + + +def _make_import_task( + paths: list[bytes] | None = None, + toppath: bytes | None = None, + items: list[BeetsItem] | None = None, +) -> importer.ImportTask: + """Create a minimal BeetsImportTask for mapper tests.""" + if paths is None: + paths = [b"/fake/path/file1.mp3"] + if toppath is None: + toppath = b"/fake/path" + if items is None: + items = [beets_lib_item(title="test-item", path=str(paths[0], "utf-8"))] + + task = importer.ImportTask(paths=paths, toppath=toppath, items=items) + return task + + +# ============================================================================ +# Tests +# ============================================================================ + + +class TestSessionStateMapper: + """Tests for SessionState <-> SessionStateInDb roundtrip conversion.""" + + def test_roundtrip_empty_session(self, tmp_path: Path): + """Roundtrip a SessionState with no tasks.""" + mapper = _build_session_mapper() + ctx = Context() + + # Create a live SessionState pointing at a real temp directory + original = SessionState(tmp_path) + + # Convert to DB model + model: SessionStateInDb = mapper.to_db(original, ctx) + + # Assert model structure + assert isinstance(model, SessionStateInDb) + assert model.folder.full_path == str(tmp_path.resolve()) + assert model.folder.hash == original.folder_hash + assert len(model.tasks) == 0 + assert model.progress == original.progress.progress + assert model.exc is None + + # Convert back to live object + result: SessionState = mapper.from_db(model, ctx) + + # Assert roundtrip fidelity + assert result.id == original.id + assert result.folder_path == original.folder_path + assert result.folder_hash == original.folder_hash + assert len(result.task_states) == 0 + assert result.progress == original.progress + + def test_roundtrip_with_exception(self, tmp_path: Path): + """Roundtrip a SessionState that carries a serialized exception.""" + from beets_flask.server.exceptions import SerializedException + + mapper = _build_session_mapper() + ctx = Context() + + original = SessionState(tmp_path) + original.exc = SerializedException( + type="ValueError", + message="something went wrong", + trace="fake traceback", + ) + + # Convert to DB model + model: SessionStateInDb = mapper.to_db(original, ctx) + assert model.exc is not None + + # Convert back + result: SessionState = mapper.from_db(model, ctx) + assert result.exc is not None + assert result.exc["type"] == "ValueError" + assert result.exc["message"] == "something went wrong" + + def test_roundtrip_with_want_to_serialize(self, tmp_path: Path): + """When want_to_serialize=True, from_db uses folder.to_live_folder().""" + mapper = _build_session_mapper(want_to_serialize=True) + ctx = Context() + + original = SessionState(tmp_path) + + model: SessionStateInDb = mapper.to_db(original, ctx) + result: SessionState = mapper.from_db(model, ctx) + + # With want_to_serialize=True the folder is reconstructed via + # folder.to_live_folder() which creates a Folder with children=[]. + # The key assertions: path and hash are preserved. + assert result.folder_path == original.folder_path + assert result.folder_hash == original.folder_hash + + +class TestTaskStateMapper: + """Tests for TaskState <-> TaskStateInDb roundtrip conversion.""" + + def test_roundtrip_minimal_task(self): + """Roundtrip a TaskState with no candidates.""" + mapper = _build_task_mapper() + ctx = Context() + + beets_task = _make_import_task() + original = TaskState(beets_task) + + # Convert to DB model + model: TaskStateInDb = mapper.to_db(original, ctx) + + # Assert model structure + assert isinstance(model, TaskStateInDb) + assert model.toppath == b"/fake/path" + assert pickle.loads(model.paths) == [b"/fake/path/file1.mp3"] + assert len(model.pending_items) == 1 + assert len(model.candidates) == 0 + assert model.progress == original.progress.progress + assert model.choice_flag == beets_task.choice_flag + assert model.cur_artist == beets_task.cur_artist + assert model.cur_album == beets_task.cur_album + assert model.old_paths is None + + # Convert back to live object + result: TaskState = mapper.from_db(model, ctx) + + # Assert roundtrip fidelity + assert result.id == original.id + assert result.toppath == original.toppath + assert result.paths == original.paths + assert len(result.items) == 1 + assert len(result.candidate_states) == 0 + assert result.progress == original.progress + assert result.task.choice_flag == beets_task.choice_flag + + def test_roundtrip_with_old_paths(self): + """Roundtrip a TaskState whose underlying task has old_paths set.""" + mapper = _build_task_mapper() + ctx = Context() + + beets_task = _make_import_task() + # Simulate moved files: old_paths differ from paths + beets_task.old_paths = [b"/old/path/file1.mp3"] + + original = TaskState(beets_task) + + # Convert to DB model + model: TaskStateInDb = mapper.to_db(original, ctx) + assert model.old_paths is not None + assert pickle.loads(model.old_paths) == [b"/old/path/file1.mp3"] + + # Convert back + result: TaskState = mapper.from_db(model, ctx) + assert result.task.old_paths is not None + assert result.task.old_paths == [b"/old/path/file1.mp3"] + + def test_task_items_roundtrip_preserves_fixed_and_flex_values(self): + """Verify that BeetsItem fixed/flex attrs survive the roundtrip.""" + mapper = _build_task_mapper() + ctx = Context() + + # Create an item with specific flex attributes + item = beets_lib_item(title="roundtrip-title", artist="roundtrip-artist") + item.genre = "roundtrip-genre" # flex attr via __setattr__ + + beets_task = _make_import_task(items=[item]) + original = TaskState(beets_task) + + model: TaskStateInDb = mapper.to_db(original, ctx) + result: TaskState = mapper.from_db(model, ctx) + + assert len(result.items) == 1 + result_item = result.items[0] + assert result_item.title == "roundtrip-title" + assert result_item.artist == "roundtrip-artist" + assert result_item.genre == "roundtrip-genre" + + def test_roundtrip_with_choice_flag_and_metadata(self): + """Roundtrip a task that has choice_flag, cur_artist, cur_album set.""" + from beets.importer import Action + + mapper = _build_task_mapper() + ctx = Context() + + beets_task = _make_import_task() + beets_task.choice_flag = Action.ASIS + beets_task.cur_artist = "Test Artist" + beets_task.cur_album = "Test Album" + + original = TaskState(beets_task) + + model: TaskStateInDb = mapper.to_db(original, ctx) + assert model.choice_flag == Action.ASIS + assert model.cur_artist == "Test Artist" + assert model.cur_album == "Test Album" + + result: TaskState = mapper.from_db(model, ctx) + assert result.task.choice_flag == Action.ASIS + assert result.task.cur_artist == "Test Artist" + assert result.task.cur_album == "Test Album" + + +class TestCandidateStateMapper: + """Tests for CandidateState <-> CandidateStateInDb roundtrip conversion.""" + + @pytest.fixture + def candidate_mapper(self) -> CandidateStateMapper: + """Build a CandidateStateMapper wired to a shared TaskStateMapper.""" + return _build_candidate_mapper() + + @pytest.fixture + def task_mapper(self) -> TaskStateMapper: + """Build a standalone TaskStateMapper for use in tests.""" + return _build_task_mapper() + + def test_roundtrip_album_candidate_no_duplicates( + self, + candidate_mapper: CandidateStateMapper, + task_mapper: TaskStateMapper, + ): + """Roundtrip an album-match CandidateState with no duplicates.""" + ctx = Context() + + # ---- build live objects ---- + item = beets_lib_item(title="disk-item") + beets_task = _make_import_task(items=[item]) + task_state = TaskState(beets_task) + + beets_track = BeetsTrackInfo(title="candidate-track") + album_match = create_beets_album_match( + album_id="alb-1", + album_name="Candidate Album", + album_artist="Candidate Artist", + tracks=[beets_track], + distance_penalties={"artist": 0.1}, + mapping={item: beets_track}, + ) + + original = CandidateState(match=album_match, task_state=task_state) + + # ---- to_db ---- + model: CandidateStateInDb = candidate_mapper.to_db(original, ctx) + assert isinstance(model, CandidateStateInDb) + assert model.duplicate_ids == "" + assert isinstance(model.match, AlbumMatch) + assert model.match.info.data["album"] == "Candidate Album" + assert model.match.info.data["artist"] == "Candidate Artist" + assert len(model.match.info.tracks) == 1 + assert model.match.info.tracks[0].data["title"] == "candidate-track" + + # ---- Wire up the task relationship on the model ---- + # In the real app SQLAlchemy sets this FK; in unit tests we do it + # manually so that _from_db can roundtrip. + task_model: TaskStateInDb = task_mapper.to_db(task_state, ctx) + model.task = task_model + + # ---- from_db (use fresh context to avoid cache interference) ---- + ctx2 = Context() + result: CandidateState = candidate_mapper.from_db(model, ctx2) + + assert result.id == original.id + assert isinstance(result.match, BeetsAlbumMatch) + assert result.match.info.album_id == "alb-1" + assert result.match.info.album == "Candidate Album" + assert result.match.info.artist == "Candidate Artist" + assert len(result.match.info.tracks) == 1 + assert result.match.info.tracks[0].title == "candidate-track" + assert result.duplicate_ids == [] + + def test_roundtrip_with_duplicate_ids( + self, + candidate_mapper: CandidateStateMapper, + task_mapper: TaskStateMapper, + ): + """Roundtrip a CandidateState that has duplicate IDs set.""" + ctx = Context() + + item = beets_lib_item(title="dup-item") + beets_task = _make_import_task(items=[item]) + task_state = TaskState(beets_task) + + beets_track = BeetsTrackInfo(title="dup-track") + album_match = create_beets_album_match( + tracks=[beets_track], + mapping={item: beets_track}, + ) + + original = CandidateState(match=album_match, task_state=task_state) + original.duplicate_ids = ["dup-1", "dup-2", "dup-3"] + + model: CandidateStateInDb = candidate_mapper.to_db(original, ctx) + assert model.duplicate_ids == "dup-1;dup-2;dup-3" + + task_model: TaskStateInDb = task_mapper.to_db(task_state, ctx) + model.task = task_model + + ctx2 = Context() + result: CandidateState = candidate_mapper.from_db(model, ctx2) + assert result.duplicate_ids == ["dup-1", "dup-2", "dup-3"] + + def test_roundtrip_track_match_candidate( + self, + candidate_mapper: CandidateStateMapper, + task_mapper: TaskStateMapper, + ): + """Roundtrip a CandidateState wrapping a TrackMatch.""" + ctx = Context() + + beets_task = _make_import_task() + task_state = TaskState(beets_task) + + # Build a TrackMatch + track_distance = BeetsDistance() + track_distance.add("artist", 0.1) + beets_track = BeetsTrackInfo( + title="Track Candidate", + artist="Track Artist", + length=200.0, + index=1, + ) + beets_item = beets_lib_item(title="Matched Item") + track_match = BeetsTrackMatch( + distance=track_distance, + info=beets_track, + item=beets_item, + ) + + original = CandidateState(match=track_match, task_state=task_state) + + model: CandidateStateInDb = candidate_mapper.to_db(original, ctx) + from beets_flask.database.models.match import TrackMatch + + assert isinstance(model.match, TrackMatch) + assert model.match.info.data["title"] == "Track Candidate" + assert model.match.info.data["artist"] == "Track Artist" + + task_model: TaskStateInDb = task_mapper.to_db(task_state, ctx) + model.task = task_model + + ctx2 = Context() + result: CandidateState = candidate_mapper.from_db(model, ctx2) + assert isinstance(result.match, BeetsTrackMatch) + assert result.match.info.title == "Track Candidate" + assert result.match.info.artist == "Track Artist" + assert result.match.distance.raw_distance == track_distance.raw_distance + assert result.match.item.title == "Matched Item" + + def test_empty_duplicate_ids_edge_case( + self, + candidate_mapper: CandidateStateMapper, + task_mapper: TaskStateMapper, + ): + """Edge case: empty string in duplicate_ids should become [] not [''].""" + ctx = Context() + + item = beets_lib_item(title="edge-item") + beets_task = _make_import_task(items=[item]) + task_state = TaskState(beets_task) + + beets_track = BeetsTrackInfo(title="edge-track") + album_match = create_beets_album_match( + tracks=[beets_track], mapping={item: beets_track} + ) + + original = CandidateState(match=album_match, task_state=task_state) + original.duplicate_ids = [] + + model: CandidateStateInDb = candidate_mapper.to_db(original, ctx) + assert model.duplicate_ids == "" + + task_model: TaskStateInDb = task_mapper.to_db(task_state, ctx) + model.task = task_model + + ctx2 = Context() + result: CandidateState = candidate_mapper.from_db(model, ctx2) + assert result.duplicate_ids == [] + + +class TestTaskStateWithCandidatesIntegration: + """Integration-style tests: roundtrip a TaskState that contains candidates.""" + + def test_roundtrip_task_with_candidates(self): + """Full roundtrip: TaskState with candidates -> TaskStateInDb -> TaskState.""" + # Build shared mappers (avoiding recursion) + task_mapper = _build_task_mapper() + ctx = Context() + + # ---- build live TaskState with candidates ---- + item = beets_lib_item(title="integration-item") + beets_task = _make_import_task(items=[item]) + beets_task.choice_flag = importer.Action.ASIS + task_state = TaskState(beets_task) + + beets_track = BeetsTrackInfo(title="integration-track") + album_match = create_beets_album_match( + album_id="int-1", + tracks=[beets_track], + mapping={item: beets_track}, + distance_penalties={"source": 0.5}, + ) + # Simulate beets setting candidates on the task + beets_task.candidates = [album_match] + task_state.candidate_states = [CandidateState(album_match, task_state)] + + # ---- to_db ---- + model: TaskStateInDb = task_mapper.to_db(task_state, ctx) + assert len(model.candidates) == 1 + assert model.candidates[0].match.info.data["album_id"] == "int-1" + + # Wire up reverse relationships on the candidate models + for c_model in model.candidates: + c_model.task = model + + # ---- from_db (fresh context) ---- + ctx2 = Context() + result: TaskState = task_mapper.from_db(model, ctx2) + + assert len(result.candidate_states) == 1 + cs = result.candidate_states[0] + assert isinstance(cs.match, BeetsAlbumMatch) + assert cs.match.info.album_id == "int-1" + assert cs.match.info.album == "Test Album" # default from factory + # The match info tracks are roundtripped + assert len(cs.match.info.tracks) == 1 + assert cs.match.info.tracks[0].title == "integration-track" + # The task-level attributes are preserved + assert result.task.choice_flag == importer.Action.ASIS + assert len(result.items) == 1 + assert result.items[0].title == "integration-item" diff --git a/backend/tests/unit/test_importer/test_states.py b/backend/tests/unit/test_importer/test_states.py index f28f2282..b77094f9 100644 --- a/backend/tests/unit/test_importer/test_states.py +++ b/backend/tests/unit/test_importer/test_states.py @@ -130,11 +130,6 @@ def test_properties(self): assert candidate.url == task.candidates[0].info.data_url assert candidate.url == "url" - # _mapping is set statically in the user_query stage, not via fixture. - # but mapping has a fallback that uses candidate.match.mapping - assert candidate._mapping == candidate.current_mapping - assert candidate.mapping == {0: 0} - def test_asis_candidate(self): # Test asis candidate (last in list) asis_candidate = self.task_state.asis_candidate From 962676497ffcea228b5fe2afbb613cf58d835f60 Mon Sep 17 00:00:00 2001 From: Sebastian Mohr Date: Fri, 5 Jun 2026 17:08:23 +0200 Subject: [PATCH 09/12] Removed _mapping in candidate state database model --- ...ba78_added_item_reference_to_trackmatch.py | 11 ++-- backend/beets_flask/importer/states.py | 60 +++++-------------- backend/beets_flask/invoker/enqueue.py | 1 + backend/tests/integration/test_flows.py | 14 +++-- 4 files changed, 31 insertions(+), 55 deletions(-) diff --git a/backend/alembic/versions/2026_05_20_2120-25649aa3ba78_added_item_reference_to_trackmatch.py b/backend/alembic/versions/2026_05_20_2120-25649aa3ba78_added_item_reference_to_trackmatch.py index 4d5df24a..12e08591 100644 --- a/backend/alembic/versions/2026_05_20_2120-25649aa3ba78_added_item_reference_to_trackmatch.py +++ b/backend/alembic/versions/2026_05_20_2120-25649aa3ba78_added_item_reference_to_trackmatch.py @@ -22,15 +22,14 @@ def upgrade() -> None: """Upgrade schema.""" - # ### commands auto generated by Alembic - please adjust! ### op.add_column("matches_track", sa.Column("item_id", sa.String(), nullable=False)) - op.create_foreign_key(None, "matches_track", "items", ["item_id"], ["id"]) - # ### end Alembic commands ### + op.create_foreign_key( + "fk_matches_track_items", "matches_track", "items", ["item_id"], ["id"] + ) + op.drop_column("candidate", "mapping") def downgrade() -> None: """Downgrade schema.""" - # ### commands auto generated by Alembic - please adjust! ### - op.drop_constraint(None, "matches_track", type_="foreignkey") + op.drop_constraint("fk_matches_track_items", "matches_track", type_="foreignkey") op.drop_column("matches_track", "item_id") - # ### end Alembic commands ### diff --git a/backend/beets_flask/importer/states.py b/backend/beets_flask/importer/states.py index 118ee693..f93e448c 100644 --- a/backend/beets_flask/importer/states.py +++ b/backend/beets_flask/importer/states.py @@ -441,25 +441,16 @@ class CandidateState(BaseState): # Reference upwards task_state: TaskState - _mapping: dict[int, int] # index mapping from items to tracks - def __init__( self, match: BeetsAlbumMatch | BeetsTrackMatch, task_state: TaskState, - mapping: dict[int, int] | None = None, ) -> None: super().__init__() self.match = match self.duplicate_ids = [] # checked and set by session self.task_state = task_state - # current_mapping is dynamic and looks at the match to generate integer / index mapping - # this can cause problems, when loading a previously imported candidate from the db - # as, in this case, the mapping is wrong and _index_mapping will fail. - # we take care of this by manually overwriting when constructing from the db. - self._mapping = mapping or self.current_mapping - def __repr__(self) -> str: return ( f"CandidateState:\n" @@ -470,7 +461,6 @@ def __repr__(self) -> str: + f" * penalties={self.penalties}\n" + f" * {len(self.items)=}\n" + f" * {len(self.tracks)=}\n" - + f" * mapping={self.mapping}\n" ) @property @@ -655,22 +645,6 @@ def is_asis(self) -> bool: """Returns True if this is an "as is" candidate.""" return self.id.startswith("asis-") - @property - def mapping(self) -> dict[int, int]: - return self._mapping - - @property - def current_mapping(self) -> dict[int, int]: - """Get the current mapping from items to tracks, calculated from the match.""" - if isinstance(self.match, BeetsAlbumMatch): - return _index_mapping( - self.match.mapping, - self.items, - self.tracks, - ) - - raise ValueError("Current mapping only available for album matches.") - # ------------------------------------ utility ----------------------------------- # def identify_duplicates(self, lib: BeetsLibrary | None = None) -> list[BeetsAlbum]: @@ -729,29 +703,26 @@ def serialize(self) -> SerializedCandidateState: # we lift the match.info to reduce nesting in the frontend. info: TrackInfo | AlbumInfo tracks: list[TrackInfo] - mapping: dict[int, int] = {} + mapping: dict[int, int] - if isinstance(self.match.info, BeetsTrackInfo): + if isinstance(self.match, BeetsTrackMatch): # This hardly ever happens, we might support this more in the future info = TrackInfo.from_beets(self.match.info) tracks = [TrackInfo.from_beets(self.match.info)] - elif isinstance(self.match.info, BeetsAlbumInfo): + mapping = {} + elif isinstance(self.match, BeetsAlbumMatch): info = AlbumInfo.from_beets(self.match.info) - # Map beets types to our types, allows serialization magic tracks = [TrackInfo.from_beets(track) for track in self.match.info.tracks] - - # mapping = _index_mapping( - # self.match.mapping, # type: ignore - # self.items, - # self.tracks, - # ) - mapping = self.mapping - + mapping = _index_mapping( + self.match.mapping, + self.items, + self.tracks, + ) else: raise ValueError(f"Unknown type of matchinfo {type(self.match.info)}") - res = SerializedCandidateState( + return SerializedCandidateState( **super().serialize(), penalties=self.penalties, duplicate_ids=self.duplicate_ids, @@ -762,8 +733,6 @@ def serialize(self) -> SerializedCandidateState: mapping=mapping, ) - return res - def _index_mapping( mapping: dict[BeetsItem, BeetsTrackInfo], @@ -772,8 +741,8 @@ def _index_mapping( ) -> dict[int, int]: """Helper to create an index mapping from items to tracks. - the mapping of a beets albummatch uses objects, but we don not want - to send them over redundantly. convert to an index mapping, + The mapping of a beets albummatch uses objects, but we dont want + to send them over redundantly. Convert to an index mapping, where first index is in self.items, and second is in self.match.info.tracks This is used to serialize the mapping of a candidate state. @@ -882,7 +851,10 @@ class SerializedCandidateState(SerializedBaseState): info: TrackInfo | ItemInfo | AlbumInfo - # Mapping from items to tracks index based + # We need a way to reconstruct the AlbumMatch.mapping in the frontend + # Without sending duplicate objects. we opted for index based mapping + # for the transfer layer + # TODO: Might make sense to use ids here and adjust the frontend mapping: dict[int, int] tracks: list[TrackInfo] diff --git a/backend/beets_flask/invoker/enqueue.py b/backend/beets_flask/invoker/enqueue.py index cda010b8..1a65966d 100644 --- a/backend/beets_flask/invoker/enqueue.py +++ b/backend/beets_flask/invoker/enqueue.py @@ -458,6 +458,7 @@ async def run_preview( ) max_rev = db_session.execute(stmt).scalar_one_or_none() new_rev = 0 if max_rev is None else max_rev + 1 + s_state_indb = SessionStateInDb.from_live_state(p_session.state) s_state_indb.folder_revision = new_rev diff --git a/backend/tests/integration/test_flows.py b/backend/tests/integration/test_flows.py index df3a9f33..484aa3f4 100644 --- a/backend/tests/integration/test_flows.py +++ b/backend/tests/integration/test_flows.py @@ -147,13 +147,11 @@ async def test_preview( t_state_live = s_state_live.task_states[0] assert t_state_live.progress == Progress.PREVIEW_COMPLETED - for c in t_state_live.candidate_states: - assert len(c.duplicate_ids) == 0, ( + for cand in t_state_live.candidate_states: + assert len(cand.duplicate_ids) == 0, ( "Should not have duplicates in empty library" ) - assert c._mapping is not None, "Candidate should have a mapping" - class TestPreviewMultipleTasks( SendStatusMockMixin, IsolatedDBMixin, IsolatedBeetsLibraryMixin @@ -260,7 +258,9 @@ def check_mapping_consistency(self, db_session: Session): for s in s_states_indb: for t in s.to_live_state().task_states: for c in t.candidate_states: - assert c.mapping in [{0: x} for x in range(0, c.num_tracks)] + assert c.serialize()["mapping"] in [ + {0: x} for x in range(0, c.num_tracks) + ] return True @@ -512,6 +512,10 @@ async def test_reimport_fails(self, db_session: Session, path: Path): "Database should contain the one preview session state from the previous test" ) + stmt = select(SessionStateInDb) + s_state_indb = db_session.execute(stmt).scalar() + breakpoint() + self.statuses = [] exc = await run_import_candidate( From 1edb3e4ffb5cfca63301b18ff4eeb275437d3c63 Mon Sep 17 00:00:00 2001 From: Sebastian Mohr Date: Mon, 8 Jun 2026 20:44:31 +0200 Subject: [PATCH 10/12] Cyclic referencing now working. --- backend/beets_flask/database/mapper/base.py | 13 ++-- backend/beets_flask/database/mapper/states.py | 10 ++- .../unit/test_database/mapper/test_states.py | 61 ++++--------------- 3 files changed, 27 insertions(+), 57 deletions(-) diff --git a/backend/beets_flask/database/mapper/base.py b/backend/beets_flask/database/mapper/base.py index 423c34ac..f65e0624 100644 --- a/backend/beets_flask/database/mapper/base.py +++ b/backend/beets_flask/database/mapper/base.py @@ -46,21 +46,22 @@ class DBMapper(Protocol[B, M]): def to_db(self, obj: B, ctx: Context) -> M: """Convert a Beets object into a model instance with caching.""" key = id(obj) - if key in ctx.from_cache: - return ctx.from_cache[key] + if key in ctx.to_cache: + return ctx.to_cache[key] model = self._to_db(obj, ctx) - ctx.from_cache[key] = model + ctx.to_cache[key] = model return model def from_db(self, model: M, ctx: Context) -> B: """Convert a model instance back into a Beets object with caching.""" key = id(model) - if key in ctx.to_cache: - return ctx.to_cache[key] + if key in ctx.from_cache: + return ctx.from_cache[key] + # Backward-compatible single-phase path obj = self._from_db(model, ctx) - ctx.to_cache[key] = obj + ctx.from_cache[key] = obj return obj def _to_db(self, obj: B, ctx: Context) -> M: diff --git a/backend/beets_flask/database/mapper/states.py b/backend/beets_flask/database/mapper/states.py index 5860042b..066e5ded 100644 --- a/backend/beets_flask/database/mapper/states.py +++ b/backend/beets_flask/database/mapper/states.py @@ -1,4 +1,5 @@ import pickle +from functools import cached_property from beets_flask.database.models.pending import TaskItem from beets_flask.database.models.states import ( @@ -78,6 +79,10 @@ def _from_db(self, model: TaskStateInDb, ctx: Context) -> TaskState: beets_task.old_paths = old_paths # type: ignore obj = TaskState(beets_task) + # Slightly hacky: we add the task to the cache early to allow + # the candidate mapper to find the reference before a return here + ctx.from_cache[id(model)] = obj + obj.id = model.id obj.created_at = model.created_at obj.updated_at = model.updated_at @@ -119,7 +124,10 @@ def _to_db(self, obj: TaskState, ctx: Context) -> TaskStateInDb: class CandidateStateMapper(DBMapper[CandidateState, CandidateStateInDb]): def __init__(self) -> None: self.match_mapper = MatchMapper() - self.task_mapper = TaskStateMapper() + + @cached_property + def task_mapper(self): + return TaskStateMapper() def _from_db(self, model: CandidateStateInDb, ctx: Context) -> CandidateState: obj = CandidateState( diff --git a/backend/tests/unit/test_database/mapper/test_states.py b/backend/tests/unit/test_database/mapper/test_states.py index 9f6af72d..53f71a38 100644 --- a/backend/tests/unit/test_database/mapper/test_states.py +++ b/backend/tests/unit/test_database/mapper/test_states.py @@ -15,7 +15,6 @@ from beets.autotag.hooks import TrackMatch as BeetsTrackMatch from beets_flask.database.mapper.base import Context -from beets_flask.database.mapper.match import ItemMapper, MatchMapper from beets_flask.database.mapper.states import ( CandidateStateMapper, SessionStateMapper, @@ -32,44 +31,6 @@ from tests.conftest import beets_lib_item from tests.unit.test_database.mapper.test_match import create_beets_album_match -# --------------------------------------------------------------------------- -# Helper: build mapper instances without triggering infinite recursion -# --------------------------------------------------------------------------- -# The mappers have a circular __init__ dependency: -# TaskStateMapper -> CandidateStateMapper -> TaskStateMapper -> ... -# We bypass __init__ via object.__new__ and wire up the sub-mappers manually, -# sharing instances to break the cycle. - - -def _build_candidate_mapper() -> CandidateStateMapper: - """Build a CandidateStateMapper with a shared TaskStateMapper to avoid recursion.""" - cm = object.__new__(CandidateStateMapper) - cm.match_mapper = MatchMapper() - cm.task_mapper = _build_task_mapper(candidate_mapper=cm) - return cm - - -def _build_task_mapper( - candidate_mapper: CandidateStateMapper | None = None, -) -> TaskStateMapper: - """Build a TaskStateMapper, optionally reusing an existing CandidateStateMapper.""" - tm = object.__new__(TaskStateMapper) - tm.item_mapper = ItemMapper() - if candidate_mapper is not None: - tm.candidate_mapper = candidate_mapper - else: - tm.candidate_mapper = _build_candidate_mapper() - return tm - - -def _build_session_mapper(want_to_serialize: bool = False) -> SessionStateMapper: - """Build a SessionStateMapper that owns a fresh TaskStateMapper chain.""" - sm = object.__new__(SessionStateMapper) - sm.task_mapper = _build_task_mapper() - sm.want_to_serialize = want_to_serialize - return sm - - # --------------------------------------------------------------------------- # Helper: create a minimal BeetsImportTask for testing # --------------------------------------------------------------------------- @@ -102,7 +63,7 @@ class TestSessionStateMapper: def test_roundtrip_empty_session(self, tmp_path: Path): """Roundtrip a SessionState with no tasks.""" - mapper = _build_session_mapper() + mapper = SessionStateMapper() ctx = Context() # Create a live SessionState pointing at a real temp directory @@ -133,7 +94,7 @@ def test_roundtrip_with_exception(self, tmp_path: Path): """Roundtrip a SessionState that carries a serialized exception.""" from beets_flask.server.exceptions import SerializedException - mapper = _build_session_mapper() + mapper = SessionStateMapper() ctx = Context() original = SessionState(tmp_path) @@ -155,7 +116,7 @@ def test_roundtrip_with_exception(self, tmp_path: Path): def test_roundtrip_with_want_to_serialize(self, tmp_path: Path): """When want_to_serialize=True, from_db uses folder.to_live_folder().""" - mapper = _build_session_mapper(want_to_serialize=True) + mapper = SessionStateMapper(want_to_serialize=True) ctx = Context() original = SessionState(tmp_path) @@ -175,7 +136,7 @@ class TestTaskStateMapper: def test_roundtrip_minimal_task(self): """Roundtrip a TaskState with no candidates.""" - mapper = _build_task_mapper() + mapper = TaskStateMapper() ctx = Context() beets_task = _make_import_task() @@ -210,7 +171,7 @@ def test_roundtrip_minimal_task(self): def test_roundtrip_with_old_paths(self): """Roundtrip a TaskState whose underlying task has old_paths set.""" - mapper = _build_task_mapper() + mapper = TaskStateMapper() ctx = Context() beets_task = _make_import_task() @@ -231,7 +192,7 @@ def test_roundtrip_with_old_paths(self): def test_task_items_roundtrip_preserves_fixed_and_flex_values(self): """Verify that BeetsItem fixed/flex attrs survive the roundtrip.""" - mapper = _build_task_mapper() + mapper = TaskStateMapper() ctx = Context() # Create an item with specific flex attributes @@ -254,7 +215,7 @@ def test_roundtrip_with_choice_flag_and_metadata(self): """Roundtrip a task that has choice_flag, cur_artist, cur_album set.""" from beets.importer import Action - mapper = _build_task_mapper() + mapper = TaskStateMapper() ctx = Context() beets_task = _make_import_task() @@ -281,12 +242,12 @@ class TestCandidateStateMapper: @pytest.fixture def candidate_mapper(self) -> CandidateStateMapper: """Build a CandidateStateMapper wired to a shared TaskStateMapper.""" - return _build_candidate_mapper() + return CandidateStateMapper() @pytest.fixture def task_mapper(self) -> TaskStateMapper: """Build a standalone TaskStateMapper for use in tests.""" - return _build_task_mapper() + return TaskStateMapper() def test_roundtrip_album_candidate_no_duplicates( self, @@ -457,7 +418,7 @@ class TestTaskStateWithCandidatesIntegration: def test_roundtrip_task_with_candidates(self): """Full roundtrip: TaskState with candidates -> TaskStateInDb -> TaskState.""" # Build shared mappers (avoiding recursion) - task_mapper = _build_task_mapper() + task_mapper = TaskStateMapper() ctx = Context() # ---- build live TaskState with candidates ---- @@ -471,7 +432,7 @@ def test_roundtrip_task_with_candidates(self): album_id="int-1", tracks=[beets_track], mapping={item: beets_track}, - distance_penalties={"source": 0.5}, + distance_penalties={"data_source": 0.5}, ) # Simulate beets setting candidates on the task beets_task.candidates = [album_match] From 92120605f083af15cb3b0afac1e13e0084674fc8 Mon Sep 17 00:00:00 2001 From: Sebastian Mohr Date: Mon, 8 Jun 2026 22:37:23 +0200 Subject: [PATCH 11/12] Fixed beets bug with items no being references but new copies for some reason. --- backend/beets_flask/database/mapper/states.py | 49 +++++++++++++++++-- backend/beets_flask/database/models/states.py | 2 + backend/tests/integration/test_flows.py | 4 -- 3 files changed, 46 insertions(+), 9 deletions(-) diff --git a/backend/beets_flask/database/mapper/states.py b/backend/beets_flask/database/mapper/states.py index 066e5ded..fe7f7b69 100644 --- a/backend/beets_flask/database/mapper/states.py +++ b/backend/beets_flask/database/mapper/states.py @@ -9,7 +9,7 @@ TaskStateInDb, ) from beets_flask.importer.states import CandidateState, SessionState, TaskState -from beets_flask.importer.types import BeetsImportTask +from beets_flask.importer.types import BeetsAlbumMatch, BeetsImportTask from beets_flask.logger import log from .base import Context, DBMapper @@ -97,21 +97,53 @@ def _from_db(self, model: TaskStateInDb, ctx: Context) -> TaskState: return obj def _to_db(self, obj: TaskState, ctx: Context) -> TaskStateInDb: + # Ensure task.items and all candidate mapping keys share identity. + # Beets mutates only match.items (via imported_items()) during import, + # and DB roundtrips produce divergent Item objects. Collapse all + # references here so to_db creates a single Item DB row per logical item. + for idx, task_item in enumerate(obj.task.items): + for cs in obj.candidate_states: + if not isinstance(cs.match, BeetsAlbumMatch): + continue + for match_item in cs.match.mapping.keys(): + if ( + match_item.track == task_item.track + and match_item.title == task_item.title + ): + obj.task.items[idx] = match_item + break + else: + continue + break + + # Also replace mapping dict keys in ALL candidates so every + # candidate shares the same Item objects as task.items. + for cs in obj.candidate_states: + if not isinstance(cs.match, BeetsAlbumMatch): + continue + new_map = {} + for mi, track in cs.match.mapping.items(): + for ti in obj.task.items: + if mi.track == ti.track and mi.title == ti.title: + new_map[ti] = track + break + else: + new_map[mi] = track + cs.match.mapping = new_map + if hasattr(obj.task, "old_paths"): old_paths = obj.task.old_paths else: old_paths = None - return TaskStateInDb( + model = TaskStateInDb( id=obj.id, toppath=str(obj.toppath).encode("utf-8") if obj.toppath else None, paths=obj.task.paths, pending_items=[ TaskItem(item=self.item_mapper.to_db(i, ctx)) for i in obj.items ], - candidates=[ - self.candidate_mapper.to_db(c, ctx) for c in obj.candidate_states - ], + candidates=[], chosen_candidate_id=obj.chosen_candidate_state_id, progress=obj.progress.progress, choice_flag=obj.task.choice_flag, @@ -119,6 +151,12 @@ def _to_db(self, obj: TaskState, ctx: Context) -> TaskStateInDb: cur_album=obj.task.cur_album, old_paths=old_paths, ) + ctx.to_cache[id(obj)] = model + + model.candidates = [ + self.candidate_mapper.to_db(c, ctx) for c in obj.candidate_states + ] + return model class CandidateStateMapper(DBMapper[CandidateState, CandidateStateInDb]): @@ -148,4 +186,5 @@ def _to_db(self, obj: CandidateState, ctx: Context) -> CandidateStateInDb: id=obj.id, match=self.match_mapper.to_db(obj.match, ctx), duplicate_ids=obj.duplicate_ids, + task=self.task_mapper.to_db(obj.task_state, ctx), ) diff --git a/backend/beets_flask/database/models/states.py b/backend/beets_flask/database/models/states.py index bf7afee2..d1a2e8d5 100644 --- a/backend/beets_flask/database/models/states.py +++ b/backend/beets_flask/database/models/states.py @@ -405,12 +405,14 @@ class CandidateStateInDb(Base): def __init__( self, match: Match, + task: TaskStateInDb, duplicate_ids: list[str] = [], id: str | None = None, ): super().__init__(id) self.match = match + self.task = task self.duplicate_ids = ";".join(map(str, duplicate_ids)) diff --git a/backend/tests/integration/test_flows.py b/backend/tests/integration/test_flows.py index 484aa3f4..15ceaf1f 100644 --- a/backend/tests/integration/test_flows.py +++ b/backend/tests/integration/test_flows.py @@ -512,10 +512,6 @@ async def test_reimport_fails(self, db_session: Session, path: Path): "Database should contain the one preview session state from the previous test" ) - stmt = select(SessionStateInDb) - s_state_indb = db_session.execute(stmt).scalar() - breakpoint() - self.statuses = [] exc = await run_import_candidate( From ea796dbf3d11e5697f587bacf6371072f7e9d99b Mon Sep 17 00:00:00 2001 From: Sebastian Mohr Date: Mon, 8 Jun 2026 23:25:11 +0200 Subject: [PATCH 12/12] Added item migration and fixed typing issue. --- ...ba78_added_item_reference_to_trackmatch.py | 88 +++++++++++++++++-- .../unit/test_database/mapper/test_states.py | 1 + 2 files changed, 84 insertions(+), 5 deletions(-) diff --git a/backend/alembic/versions/2026_05_20_2120-25649aa3ba78_added_item_reference_to_trackmatch.py b/backend/alembic/versions/2026_05_20_2120-25649aa3ba78_added_item_reference_to_trackmatch.py index 12e08591..50e7462d 100644 --- a/backend/alembic/versions/2026_05_20_2120-25649aa3ba78_added_item_reference_to_trackmatch.py +++ b/backend/alembic/versions/2026_05_20_2120-25649aa3ba78_added_item_reference_to_trackmatch.py @@ -10,8 +10,11 @@ import sqlalchemy as sa +from beets_flask.logger import logging from alembic import op +log = logging.getLogger("alembic.runtime.migration") + # revision identifiers, used by Alembic. revision: str = "25649aa3ba78" @@ -22,14 +25,89 @@ def upgrade() -> None: """Upgrade schema.""" - op.add_column("matches_track", sa.Column("item_id", sa.String(), nullable=False)) - op.create_foreign_key( - "fk_matches_track_items", "matches_track", "items", ["item_id"], ["id"] - ) - op.drop_column("candidate", "mapping") + with op.batch_alter_table("matches_track") as batch_op: + batch_op.add_column(sa.Column("item_id", sa.String(), nullable=False)) + batch_op.create_foreign_key( + "fk_matches_track_items", "items", ["item_id"], ["id"] + ) + with op.batch_alter_table("candidate") as batch_op: + batch_op.drop_column("mapping") + + dedup_items() def downgrade() -> None: """Downgrade schema.""" op.drop_constraint("fk_matches_track_items", "matches_track", type_="foreignkey") op.drop_column("matches_track", "item_id") + + +def dedup_items() -> None: + """Collapse duplicate Item rows created when task.items and + match.mapping keys were separate Python objects during serialization. + Keeps the oldest row per (track, title) and updates all FK refs.""" + conn = op.get_bind() + + items = conn.execute( + sa.text(""" + SELECT id, + json_extract(fixed_values, '$.track') AS track, + json_extract(fixed_values, '$.title') AS title + FROM items + ORDER BY created_at ASC + """) + ).fetchall() + + seen: dict[tuple, str] = {} # (track, title) -> canonical_id + orphan_map: dict[str, str] = {} # orphan_id -> canonical_id + for row in items: + key = (row.track, row.title) + if key in seen: + orphan_map[row.id] = seen[key] + else: + seen[key] = row.id + + if not orphan_map: + log.info("No duplicate Item rows found") + return + + log.info("Deduping %d duplicate Item rows", len(orphan_map)) + + # Batch updates in chunks of 500 to stay under SQLite parameter limits + CHUNK = 500 + items_list = list(orphan_map.items()) + for start in range(0, len(items_list), CHUNK): + chunk = dict(items_list[start : start + CHUNK]) + if start > 0: + log.info("Deduping items %d / %d", start, len(orphan_map)) + + # Build CASE expression with parameters + cases = [] + params: dict[str, str] = {} + for j, (orphan_id, canonical_id) in enumerate(chunk.items()): + params[f"o{j}"] = orphan_id + params[f"c{j}"] = canonical_id + cases.append(f"WHEN :o{j} THEN :c{j}") + case_expr = " ".join(cases) + in_list = ", ".join(f":o{j}" for j in range(len(chunk))) + + conn.execute( + sa.text( + f"UPDATE tasks_items SET item_id = CASE item_id {case_expr} " + f"END WHERE item_id IN ({in_list})" + ), + params, + ) + conn.execute( + sa.text( + f"UPDATE album_match_track_mappings SET item_id = " + f"CASE item_id {case_expr} END WHERE item_id IN ({in_list})" + ), + params, + ) + conn.execute( + sa.text(f"DELETE FROM items WHERE id IN ({in_list})"), + params, + ) + + log.info("Deduped %d duplicate Item rows", len(orphan_map)) diff --git a/backend/tests/unit/test_database/mapper/test_states.py b/backend/tests/unit/test_database/mapper/test_states.py index 53f71a38..165536af 100644 --- a/backend/tests/unit/test_database/mapper/test_states.py +++ b/backend/tests/unit/test_database/mapper/test_states.py @@ -441,6 +441,7 @@ def test_roundtrip_task_with_candidates(self): # ---- to_db ---- model: TaskStateInDb = task_mapper.to_db(task_state, ctx) assert len(model.candidates) == 1 + assert isinstance(model.candidates[0].match, AlbumMatch) assert model.candidates[0].match.info.data["album_id"] == "int-1" # Wire up reverse relationships on the candidate models