Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions src/fromager/bootstrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,8 @@ def __init__(
self._seen_requirements: set[SeenKey] = set()

self._build_order_filename = self.ctx.work_dir / "build-order.json"
self._stack_filename = self.ctx.work_dir / "bootstrap-stack.json"
Comment thread
dhellmann marked this conversation as resolved.
logger.info("recording bootstrap stack state to %s", self._stack_filename)

# Track failed packages in test mode (list of typed dicts for JSON export)
self.failed_packages: list[FailureRecord] = []
Expand Down Expand Up @@ -371,6 +373,7 @@ def bootstrap(self, req: Requirement, req_type: RequirementType) -> None:

# Main iterative DFS loop
while stack:
self._record_stack_state(stack)
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rather than directly calling it , should we call it from a try/exception block. The idea is to write exceptions that escape the loop. Because it would be efficient writing when required as compared to writing the entire stack as JSON to disk on every iteration of the DFS loop

try:
    while stack:
        item = stack.pop()
        self.why = list(item.why_snapshot)

        with req_ctxvar_context(item.req), self._track_why(item):
            try:
                new_items = self._dispatch_phase(item)
            except Exception as err:
                new_items = self._handle_phase_error(item, err)
        # ... rest of loop ...
except Exception:
    self._record_stack_state(stack)
    raise

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It takes a lot less time to write than you might think, and having it available as work is progressing gives you the option of watching what the tool is doing as it works.

item = stack.pop()
self.why = list(item.why_snapshot)

Expand Down Expand Up @@ -1269,6 +1272,41 @@ def _add_to_build_order(
# converted to JSON without help.
json.dump(self._build_stack, f, indent=2, default=str)

def _record_stack_state(self, stack: list[WorkItem]) -> None:
"""Write the current bootstrap stack to `self._stack_filename`.

Index 0 in the output corresponds to `stack[-1]`, the next item to be
processed. Overwrites the file on each call.
"""

def serialize(item: WorkItem) -> dict[str, typing.Any]:
return {
"req": str(item.req),
"req_type": str(item.req_type),
"phase": str(item.phase),
"resolved_version": str(item.resolved_version)
if item.resolved_version is not None
else None,
"source_url": item.source_url,
"build_sdist_only": item.build_sdist_only,
"why": [
{"req_type": str(rt), "req": str(r), "version": str(v)}
for rt, r, v in item.why_snapshot
],
"parent": (
{"req": str(item.parent[0]), "version": str(item.parent[1])}
if item.parent
else None
),
"build_system_deps": sorted(str(r) for r in item.build_system_deps),
"build_backend_deps": sorted(str(r) for r in item.build_backend_deps),
"build_sdist_deps": sorted(str(r) for r in item.build_sdist_deps),
}

records = [serialize(item) for item in reversed(stack)]
with open(self._stack_filename, "w") as f:
json.dump(records, f, indent=2, default=str)
Comment thread
dhellmann marked this conversation as resolved.

# ---- Iterative bootstrap: phase handlers and helpers ----

def _create_unresolved_work_items(
Expand Down
168 changes: 168 additions & 0 deletions tests/test_bootstrapper.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import json
import pathlib
import typing
from unittest.mock import Mock, patch

import pytest
Expand Down Expand Up @@ -549,3 +550,170 @@ def test_cache_lookup_no_cache_url_returns_none(tmp_context: WorkContext) -> Non
)

assert result == (None, None)


def _make_resolve_item(
req: str = "testpkg",
req_type: RequirementType = RequirementType.TOP_LEVEL,
why_snapshot: list[tuple[RequirementType, Requirement, Version]] | None = None,
parent: tuple[Requirement, Version] | None = None,
) -> bootstrapper.WorkItem:
return bootstrapper.WorkItem(
req=Requirement(req),
req_type=req_type,
phase=bootstrapper.BootstrapPhase.RESOLVE,
why_snapshot=why_snapshot or [],
parent=parent,
)


def _record_and_load(
bt: bootstrapper.Bootstrapper, stack: list[bootstrapper.WorkItem]
) -> list[typing.Any]:
bt._record_stack_state(stack)
return typing.cast(list[typing.Any], json.loads(bt._stack_filename.read_text()))


def test_record_stack_state_minimal_item(tmp_context: WorkContext) -> None:
"""Minimal RESOLVE-phase item serializes with all optional fields None/empty."""
bt = bootstrapper.Bootstrapper(tmp_context)
contents = _record_and_load(bt, [_make_resolve_item()])

result = contents[0]
assert result["req"] == "testpkg"
assert result["req_type"] == str(RequirementType.TOP_LEVEL)
assert result["phase"] == str(bootstrapper.BootstrapPhase.RESOLVE)
assert result["resolved_version"] is None
assert result["source_url"] is None
assert result["build_sdist_only"] is False
assert result["why"] == []
assert result["parent"] is None
assert result["build_system_deps"] == []
assert result["build_backend_deps"] == []
assert result["build_sdist_deps"] == []


def test_record_stack_state_full_item(tmp_context: WorkContext) -> None:
"""Fully-populated item serializes resolved_version, parent, why, and dep sets."""
bt = bootstrapper.Bootstrapper(tmp_context)
parent_req = Requirement("parent-pkg")
parent_version = Version("2.0")
why_snapshot = [(RequirementType.INSTALL, parent_req, parent_version)]

item = bootstrapper.WorkItem(
req=Requirement("child-pkg>=1.0"),
req_type=RequirementType.INSTALL,
phase=bootstrapper.BootstrapPhase.BUILD,
why_snapshot=why_snapshot,
parent=(parent_req, parent_version),
resolved_version=Version("1.5"),
source_url="https://pypi.test/child-pkg-1.5.tar.gz",
build_sdist_only=True,
build_system_deps={Requirement("setuptools")},
build_backend_deps={Requirement("wheel")},
build_sdist_deps={Requirement("flit-core")},
)

contents = _record_and_load(bt, [item])
result = contents[0]

assert result["resolved_version"] == "1.5"
assert result["source_url"] == "https://pypi.test/child-pkg-1.5.tar.gz"
assert result["build_sdist_only"] is True
assert result["why"] == [
{
"req_type": str(RequirementType.INSTALL),
"req": "parent-pkg",
"version": "2.0",
}
]
assert result["parent"] == {"req": "parent-pkg", "version": "2.0"}
assert result["build_system_deps"] == ["setuptools"]
assert result["build_backend_deps"] == ["wheel"]
assert result["build_sdist_deps"] == ["flit-core"]


def test_record_stack_state_dep_sets_are_sorted(tmp_context: WorkContext) -> None:
"""Mixed-order dep sets come out alphabetically sorted."""
bt = bootstrapper.Bootstrapper(tmp_context)
item = bootstrapper.WorkItem(
req=Requirement("mypkg"),
req_type=RequirementType.TOP_LEVEL,
phase=bootstrapper.BootstrapPhase.BUILD,
why_snapshot=[],
build_system_deps={Requirement("zzz"), Requirement("aaa"), Requirement("mmm")},
)

contents = _record_and_load(bt, [item])
assert contents[0]["build_system_deps"] == ["aaa", "mmm", "zzz"]


def test_record_stack_state_writes_file(tmp_context: WorkContext) -> None:
"""File is created; list length matches stack size."""
bt = bootstrapper.Bootstrapper(tmp_context)
stack = [_make_resolve_item("pkga"), _make_resolve_item("pkgb")]

bt._record_stack_state(stack)

assert bt._stack_filename.exists()
contents = json.loads(bt._stack_filename.read_text())
assert isinstance(contents, list)
assert len(contents) == 2


def test_record_stack_state_ordering(tmp_context: WorkContext) -> None:
"""Index 0 = stack[-1] (next to pop); last index = stack[0]."""
bt = bootstrapper.Bootstrapper(tmp_context)
stack = [
_make_resolve_item("pkga"),
_make_resolve_item("pkgb"),
_make_resolve_item("pkgc"),
]

contents = _record_and_load(bt, stack)

assert contents[0]["req"] == "pkgc"
assert contents[-1]["req"] == "pkga"


def test_record_stack_state_overwrites_each_call(tmp_context: WorkContext) -> None:
"""Second call replaces first call's content."""
bt = bootstrapper.Bootstrapper(tmp_context)

bt._record_stack_state([_make_resolve_item("pkga"), _make_resolve_item("pkgb")])
first_content = bt._stack_filename.read_text()

bt._record_stack_state([_make_resolve_item("pkgc")])
second_content = bt._stack_filename.read_text()

assert first_content != second_content
contents = json.loads(second_content)
assert len(contents) == 1
assert contents[0]["req"] == "pkgc"


def test_bootstrap_calls_record_stack_state(tmp_context: WorkContext) -> None:
"""`_record_stack_state` is called at least once during `bootstrap()`."""
bt = bootstrapper.Bootstrapper(tmp_context)
call_count = {"n": 0}

original = bt._record_stack_state

def counting_record(stack: list[bootstrapper.WorkItem]) -> None:
call_count["n"] += 1
original(stack)

req = Requirement("testpkg")

with (
patch.object(bt, "_record_stack_state", side_effect=counting_record),
patch.object(
bt._resolver,
"resolve",
return_value=[("https://pypi.test/testpkg-1.0.tar.gz", Version("1.0"))],
),
patch.object(bt, "_phase_start", return_value=[]),
):
bt.bootstrap(req=req, req_type=RequirementType.TOP_LEVEL)

assert call_count["n"] >= 1
Loading