diff --git a/.github/workflows/android-benchmark.yml b/.github/workflows/android-benchmark.yml index 8471459..3a9a994 100644 --- a/.github/workflows/android-benchmark.yml +++ b/.github/workflows/android-benchmark.yml @@ -113,8 +113,13 @@ jobs: git config user.name "github-actions[bot]" git config user.email "github-actions[bot]@users.noreply.github.com" git add benchmarks/android-history/ - git diff --cached --quiet || git commit -m "chore(bench): android benchmark summary ${{ github.sha }}" - git push + git diff --cached --quiet && exit 0 + git commit -m "chore(bench): android benchmark summary ${{ github.sha }}" + for i in 1 2 3 4 5; do + git push && break + echo "Push attempt $i failed — rebasing and retrying..." + git pull --rebase origin main + done - name: Compare to baseline and post PR comment if: github.event_name == 'pull_request' @@ -150,11 +155,14 @@ jobs: const ms = v => v == null ? 'n/a' : `${v}ms`; const fmt = v => v == null ? 'n/a' : v; + const pageCountStr = curr.pageCount != null ? curr.pageCount : 'n/a'; const body = [ - '## Android Benchmark Results', + '## Android Load Benchmark', + '', + '_Instrumented benchmark on an API 30 x86_64 emulator measuring load performance for the Android app._', '', `Comparing \`${curr.gitSha}\` (this PR) vs \`${base.gitSha}\` (baseline) `, - `Device: API 30 x86_64 emulator — ${curr.pageCount ?? '?'} pages`, + `Device: API 30 x86_64 emulator — ${pageCountStr} pages`, '', '| Metric | This PR | Baseline | Delta |', '|--------|---------|----------|-------|', @@ -170,7 +178,7 @@ jobs: owner: context.repo.owner, repo: context.repo.repo, issue_number: context.issue.number, }); const existing = comments.find(c => - c.user.login === 'github-actions[bot]' && c.body.includes('## Android Benchmark Results')); + c.user.login === 'github-actions[bot]' && (c.body.includes('## Android Load Benchmark') || c.body.includes('## Android Benchmark Results'))); if (existing) { await github.rest.issues.updateComment({ owner: context.repo.owner, repo: context.repo.repo, comment_id: existing.id, body, diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 5c9848f..01a3446 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -19,6 +19,7 @@ jobs: benchmark: name: Load Benchmark runs-on: ubuntu-latest + timeout-minutes: 30 if: github.event.pull_request.draft == false steps: @@ -146,11 +147,14 @@ jobs: ? `\n![Allocation flamegraph](${imageUrl})\n` : '\n_Flamegraph not available_\n'; + const pageCountStr = curr.pageCount != null ? curr.pageCount : 'n/a'; const body = [ - '## Benchmark Results', + '## JVM Load Benchmark (Desktop)', + '', + '_Synthetic in-memory benchmark measuring load performance for the desktop (JVM) app._', '', `Comparing \`${curr.gitSha}\` (this PR) vs \`${base.gitSha}\` (baseline) `, - `Graph config: \`${curr.graphConfig || 'SMALL'}\` — ${curr.pageCount || '?'} pages`, + `Graph config: \`${curr.graphConfig || 'SMALL'}\` — ${pageCountStr} pages`, '', '| Metric | This PR | Baseline | Delta |', '|--------|---------|----------|-------|', @@ -174,7 +178,7 @@ jobs: const { data: comments } = await github.rest.issues.listComments({ owner: context.repo.owner, repo: context.repo.repo, issue_number: context.issue.number, }); - const existing = comments.find(c => c.user.login === 'github-actions[bot]' && c.body.includes('## Benchmark Results')); + const existing = comments.find(c => c.user.login === 'github-actions[bot]' && (c.body.includes('## JVM Load Benchmark') || c.body.includes('## Benchmark Results'))); if (existing) { await github.rest.issues.updateComment({ owner: context.repo.owner, repo: context.repo.repo, comment_id: existing.id, body, diff --git a/docs/bugs/open/BUG-002-benchmark-jobs-skip-on-push-to-main.md b/docs/bugs/open/BUG-002-benchmark-jobs-skip-on-push-to-main.md new file mode 100644 index 0000000..208ca51 --- /dev/null +++ b/docs/bugs/open/BUG-002-benchmark-jobs-skip-on-push-to-main.md @@ -0,0 +1,53 @@ +# BUG-002: Benchmark Workflow Jobs Silently Skip on Push to Main [SEVERITY: High] + +**Status**: 🐛 Open +**Discovered**: 2026-04-26 +**Impact**: Benchmark history is never committed on push to `main`. The `android-benchmark` and `benchmark` workflows trigger on push but the job-level `if` condition silently evaluates to `false`, causing both jobs to be skipped every time. As a result, `benchmarks/history/` and `benchmarks/android-history/` are never updated on main, and the PR comparison baseline is always stale or missing. + +## Problem Description + +Both `.github/workflows/benchmark.yml` and `.github/workflows/android-benchmark.yml` use the following job-level condition to exclude draft PRs: + +```yaml +if: github.event.pull_request.draft == false +``` + +On **push** events (not pull_request events), `github.event.pull_request` is an empty object and `.draft` resolves to `null`. In GitHub Actions expression evaluation, `null == false` is `false`, so the job is skipped entirely. The workflows are configured to trigger on both `push` (branches: [main]) and `pull_request`, but the jobs only ever run on non-draft pull_request events. + +## Reproduction Steps + +1. Merge a PR into `main` (triggers a push event on main). +2. Observe the "Android Benchmark" and "Load Benchmark" workflow runs in the Actions tab. +3. Expected: Both jobs run, collect results, and commit benchmark JSON files to `benchmarks/*-history/`. +4. Actual: Both jobs show as "skipped" — the `if` condition evaluates to `false` because `github.event.pull_request.draft` is `null` on a push event. + +## Root Cause + +The `if` guard intended to skip draft PRs (`github.event.pull_request.draft == false`) has a type-coercion side effect: when there is no PR context (push events), `.draft` is `null`, and GitHub's expression engine evaluates `null == false` as `false`. + +## Files Likely Affected + +- `.github/workflows/benchmark.yml` — line with `if: github.event.pull_request.draft == false` +- `.github/workflows/android-benchmark.yml` — same condition + +## Fix Approach + +Add an explicit push-event guard so the condition only applies to PR events: + +```yaml +if: github.event_name == 'push' || github.event.pull_request.draft == false +``` + +This allows push events through unconditionally and preserves the draft-skip behavior for PR events. + +## Verification + +1. Apply the fix to both workflow files. +2. Merge a commit to `main`. +3. Confirm both benchmark jobs run (not skipped) in the Actions tab. +4. Confirm new JSON files appear in `benchmarks/history/` and `benchmarks/android-history/` in the repository after the run. + +## Related Tasks + +- Discovered during PR #35 (`stelekit-action-failing` branch) CI review +- `.github/workflows/benchmark.yml` and `android-benchmark.yml` both need the same one-line fix diff --git a/kmp/build.gradle.kts b/kmp/build.gradle.kts index fcd9a8d..dced8ba 100644 --- a/kmp/build.gradle.kts +++ b/kmp/build.gradle.kts @@ -341,8 +341,10 @@ tasks.register("jvmTestProfile") { classpath = tasks.named("jvmTest").get().classpath testClassesDirs = tasks.named("jvmTest").get().testClassesDirs - val graphPath = (project.findProperty("graphPath") as? String).orEmpty() - systemProperty("STELEKIT_GRAPH_PATH", graphPath) + val graphPath = (project.findProperty("graphPath") as? String).orEmpty() + val benchConfig = (project.findProperty("benchConfig") as? String) ?: "XLARGE" + systemProperty("STELEKIT_GRAPH_PATH", graphPath) + systemProperty("STELEKIT_BENCH_CONFIG", benchConfig) systemProperty("benchmark.output.dir", layout.buildDirectory.dir("reports").get().asFile.absolutePath) filter { @@ -454,6 +456,28 @@ print(out_file) } } +// ── library stats ("Spotify Wrapped" for your knowledge graph) ───────────── +// Usage: ./gradlew :kmp:graphStats -PgraphPath=/path/to/your/logseq +tasks.register("graphStats") { + group = "verification" + description = "Print library stats. Usage: -PgraphPath=/your/logseq" + + classpath = tasks.named("jvmTest").get().classpath + testClassesDirs = tasks.named("jvmTest").get().testClassesDirs + + val graphPath = (project.findProperty("graphPath") as? String).orEmpty() + systemProperty("STELEKIT_GRAPH_PATH", graphPath) + + filter { + includeTestsMatching("dev.stapler.stelekit.stats.LibraryWrappedTest") + } + + testLogging { + events("PASSED", "FAILED", "SKIPPED") + showStandardStreams = true + } +} + compose.desktop { application { mainClass = "dev.stapler.stelekit.desktop.MainKt" diff --git a/kmp/src/commonMain/kotlin/dev/stapler/stelekit/stats/GraphStatsReport.kt b/kmp/src/commonMain/kotlin/dev/stapler/stelekit/stats/GraphStatsReport.kt new file mode 100644 index 0000000..ea9bc61 --- /dev/null +++ b/kmp/src/commonMain/kotlin/dev/stapler/stelekit/stats/GraphStatsReport.kt @@ -0,0 +1,84 @@ +// Copyright (c) 2026 Tyler Stapler +// SPDX-License-Identifier: Elastic-2.0 + +package dev.stapler.stelekit.stats + +/** + * A snapshot of a graph's size, connectivity, growth, and link topology. + * Computed by GraphStatsCollector from raw markdown files — no SQLite required. + * Intended for use by both the CLI tool and the future in-app Library Stats screen. + */ +data class GraphStatsReport( + val graphPath: String, + + // ── Volume ──────────────────────────────────────────────────────────── + val pageCount: Int, + val journalCount: Int, + val totalBlocks: Int, + val pagesWithNoContent: Int, + + // ── Link topology ───────────────────────────────────────────────────── + val totalOutgoingLinks: Int, + val totalHashtags: Int, + /** Pages that reference at least one other page. */ + val pagesWithOutgoingLinks: Int, + /** Pages that are referenced by at least one other page. */ + val pagesWithIncomingLinks: Int, + val avgOutgoingLinksPerPage: Float, + val avgIncomingLinksPerPage: Float, + val maxIncomingLinks: Int, + val maxOutgoingLinks: Int, + /** Fraction of blocks that contain at least one [[link]]. Equivalent to SyntheticGraphGenerator.Config.linkDensity. */ + val blockLinkDensity: Float, + /** Distribution: how many pages have exactly N incoming links (capped at 20 for display). */ + val incomingLinkHistogram: Map, + /** Distribution: how many pages have exactly N outgoing links (capped at 20 for display). */ + val outgoingLinkHistogram: Map, + /** Top 15 pages ranked by incoming link count. */ + val topByIncomingLinks: List, + /** Top 15 pages ranked by outgoing link count. */ + val topByOutgoingLinks: List, + + // ── Time span ───────────────────────────────────────────────────────── + /** ISO date string "YYYY-MM-DD", or null if no dated journals found. */ + val firstJournalDate: String?, + val lastJournalDate: String?, + /** Distinct days with journal entries. */ + val journalDays: Int, + /** Calendar days between first and last journal. */ + val journalSpanDays: Int, + /** journalDays / journalSpanDays — fraction of days that have a journal entry. */ + val journalFillRate: Float, + + // ── Density ─────────────────────────────────────────────────────────── + val avgBlocksPerPage: Float, + + // ── Growth over time ────────────────────────────────────────────────── + /** "YYYY" → count of journals in that year. */ + val journalsByYear: Map, + /** "YYYY-MM" → count of journals in that month. */ + val journalsByMonth: Map, + + // ── Namespaces ──────────────────────────────────────────────────────── + val topNamespaces: List, + + // ── Benchmark targets ───────────────────────────────────────────────── + /** Suggested SyntheticGraphGenerator.Config values calibrated to 2× this library. */ + val benchmarkTargets: BenchmarkTargets, +) + +data class PageConnectivity( + val name: String, + val incomingLinks: Int, + val outgoingLinks: Int, +) + +data class NamespaceStat(val namespace: String, val count: Int) + +data class BenchmarkTargets( + val pageCount: Int, + val journalCount: Int, + val linkDensity: Float, + val blocksPerPageMin: Int, + val blocksPerPageMax: Int, +) diff --git a/kmp/src/commonMain/kotlin/dev/stapler/stelekit/stats/LibraryStatsProvider.kt b/kmp/src/commonMain/kotlin/dev/stapler/stelekit/stats/LibraryStatsProvider.kt new file mode 100644 index 0000000..0bd27ad --- /dev/null +++ b/kmp/src/commonMain/kotlin/dev/stapler/stelekit/stats/LibraryStatsProvider.kt @@ -0,0 +1,12 @@ +// Copyright (c) 2026 Tyler Stapler +// SPDX-License-Identifier: Elastic-2.0 + +package dev.stapler.stelekit.stats + +interface LibraryStatsProvider { + suspend fun collect(graphPath: String): GraphStatsReport? +} + +object NoOpLibraryStatsProvider : LibraryStatsProvider { + override suspend fun collect(graphPath: String): GraphStatsReport? = null +} diff --git a/kmp/src/commonMain/kotlin/dev/stapler/stelekit/ui/App.kt b/kmp/src/commonMain/kotlin/dev/stapler/stelekit/ui/App.kt index ed1357d..af7bdd8 100644 --- a/kmp/src/commonMain/kotlin/dev/stapler/stelekit/ui/App.kt +++ b/kmp/src/commonMain/kotlin/dev/stapler/stelekit/ui/App.kt @@ -71,6 +71,10 @@ import dev.stapler.stelekit.ui.i18n.t import dev.stapler.stelekit.ui.onboarding.Onboarding import dev.stapler.stelekit.ui.screens.AllPagesScreen import dev.stapler.stelekit.ui.screens.AllPagesViewModel +import dev.stapler.stelekit.ui.screens.LibraryStatsScreen +import dev.stapler.stelekit.ui.screens.LibraryStatsViewModel +import dev.stapler.stelekit.stats.LibraryStatsProvider +import dev.stapler.stelekit.stats.NoOpLibraryStatsProvider import dev.stapler.stelekit.ui.screens.GlobalUnlinkedReferencesScreen import dev.stapler.stelekit.ui.screens.JournalsView import dev.stapler.stelekit.ui.screens.JournalsViewModel @@ -107,6 +111,7 @@ fun StelekitApp( pluginHost: PluginHost = remember { PluginHost() }, encryptionManager: EncryptionManager = remember { DefaultEncryptionManager() }, urlFetcher: UrlFetcher = remember { NoOpUrlFetcher() }, + libraryStatsProvider: LibraryStatsProvider = NoOpLibraryStatsProvider, voicePipeline: VoicePipelineConfig = remember { VoicePipelineConfig() }, voiceSettings: VoiceSettings? = null, onRebuildVoicePipeline: (() -> Unit)? = null, @@ -240,6 +245,7 @@ fun StelekitApp( graphManager = graphManager, notificationManager = notificationManager, urlFetcher = urlFetcher, + libraryStatsProvider = libraryStatsProvider, voicePipeline = voicePipeline, voiceSettings = voiceSettings, onRebuildVoicePipeline = onRebuildVoicePipeline, @@ -267,6 +273,7 @@ private fun GraphContent( graphManager: GraphManager, notificationManager: NotificationManager, urlFetcher: UrlFetcher = NoOpUrlFetcher(), + libraryStatsProvider: LibraryStatsProvider = NoOpLibraryStatsProvider, voicePipeline: VoicePipelineConfig = VoicePipelineConfig(), voiceSettings: VoiceSettings? = null, onRebuildVoicePipeline: (() -> Unit)? = null, @@ -438,6 +445,9 @@ private fun GraphContent( val allPagesViewModel = remember { AllPagesViewModel(repos.pageRepository, repos.blockRepository) } + val libraryStatsViewModel = remember { + LibraryStatsViewModel(libraryStatsProvider, graphManager.getActiveGraphInfo()?.path ?: "") + } val searchViewModel = remember { SearchViewModel(repos.searchRepository) } @@ -450,6 +460,7 @@ private fun GraphContent( blockStateManager.close() journalsViewModel.close() allPagesViewModel.close() + libraryStatsViewModel.close() searchViewModel.close() voiceCaptureViewModel.close() viewModel.close() @@ -632,6 +643,7 @@ private fun GraphContent( blockStateManager = blockStateManager, journalsViewModel = journalsViewModel, allPagesViewModel = allPagesViewModel, + libraryStatsViewModel = libraryStatsViewModel, viewModel = viewModel, searchViewModel = searchViewModel, notificationManager = notificationManager, @@ -751,6 +763,7 @@ private fun ScreenRouter( blockStateManager: dev.stapler.stelekit.ui.state.BlockStateManager, journalsViewModel: JournalsViewModel, allPagesViewModel: AllPagesViewModel, + libraryStatsViewModel: LibraryStatsViewModel, viewModel: StelekitViewModel, searchViewModel: SearchViewModel, notificationManager: NotificationManager, @@ -816,6 +829,7 @@ private fun ScreenRouter( onPageClick = { page -> viewModel.navigateTo(Screen.PageView(page)) }, onBulkDelete = { uuids -> viewModel.bulkDeletePages(uuids) } ) + is Screen.LibraryStats -> LibraryStatsScreen(viewModel = libraryStatsViewModel) is Screen.Notifications -> { NavigationTracingEffect("Notifications") NotificationHistory(notificationManager) diff --git a/kmp/src/commonMain/kotlin/dev/stapler/stelekit/ui/AppState.kt b/kmp/src/commonMain/kotlin/dev/stapler/stelekit/ui/AppState.kt index bc1e3fc..bcf1cbd 100644 --- a/kmp/src/commonMain/kotlin/dev/stapler/stelekit/ui/AppState.kt +++ b/kmp/src/commonMain/kotlin/dev/stapler/stelekit/ui/AppState.kt @@ -20,6 +20,7 @@ sealed class Screen { @HelpPage(docs = AllPagesDocs::class) data object AllPages : Screen() + data object LibraryStats : Screen() data object Notifications : Screen() data object Logs : Screen() data object Performance : Screen() diff --git a/kmp/src/commonMain/kotlin/dev/stapler/stelekit/ui/StelekitViewModel.kt b/kmp/src/commonMain/kotlin/dev/stapler/stelekit/ui/StelekitViewModel.kt index a4602e4..7dbf004 100644 --- a/kmp/src/commonMain/kotlin/dev/stapler/stelekit/ui/StelekitViewModel.kt +++ b/kmp/src/commonMain/kotlin/dev/stapler/stelekit/ui/StelekitViewModel.kt @@ -281,10 +281,11 @@ class StelekitViewModel( } fun loadGraph(path: String) { + // Set loading state synchronously so callers observe isFullyLoaded=false immediately, + // eliminating the race where StateFlow.first{isFullyLoaded} catches the initial default. + _uiState.update { it.copy(isLoading = true, isFullyLoaded = false, statusMessage = "Loading graph from $path...") } val job = scope.launch { try { - _uiState.update { it.copy(isLoading = true, isFullyLoaded = false, statusMessage = "Loading graph from $path...") } - var graphExists = fileSystem.directoryExists(path) if (!graphExists) { @@ -660,6 +661,7 @@ class StelekitViewModel( is Screen.Performance -> "Opened Performance" is Screen.GlobalUnlinkedReferences -> "Opened Unlinked References" is Screen.Import -> "Import text as new page" + is Screen.LibraryStats -> "Opened Library Stats" } ) } diff --git a/kmp/src/commonMain/kotlin/dev/stapler/stelekit/ui/components/Sidebar.kt b/kmp/src/commonMain/kotlin/dev/stapler/stelekit/ui/components/Sidebar.kt index 6b96171..05bceba 100644 --- a/kmp/src/commonMain/kotlin/dev/stapler/stelekit/ui/components/Sidebar.kt +++ b/kmp/src/commonMain/kotlin/dev/stapler/stelekit/ui/components/Sidebar.kt @@ -22,6 +22,7 @@ import androidx.compose.material.icons.filled.Star import androidx.compose.material.icons.filled.StarBorder import androidx.compose.material.icons.filled.Style import androidx.compose.material.icons.filled.Folder +import androidx.compose.material.icons.filled.BarChart import androidx.compose.material.icons.filled.Delete import androidx.compose.material3.* import androidx.compose.runtime.* @@ -123,6 +124,7 @@ fun LeftSidebar( NavigationItem("Journals", Icons.Default.DateRange, currentScreen is Screen.Journals) { onNavigate(Screen.Journals) } NavigationItem("Flashcards", Icons.Default.Style, currentScreen is Screen.Flashcards) { onNavigate(Screen.Flashcards) } NavigationItem("All Pages", Icons.AutoMirrored.Filled.List, currentScreen is Screen.AllPages) { onNavigate(Screen.AllPages) } + NavigationItem("Library Stats", Icons.Default.BarChart, currentScreen is Screen.LibraryStats) { onNavigate(Screen.LibraryStats) } NavigationItem("Unlinked References", Icons.Default.Link, currentScreen is Screen.GlobalUnlinkedReferences) { onNavigate(Screen.GlobalUnlinkedReferences) } NavigationItem("Notifications", Icons.Default.Notifications, currentScreen is Screen.Notifications) { onNavigate(Screen.Notifications) } diff --git a/kmp/src/commonMain/kotlin/dev/stapler/stelekit/ui/screens/LibraryStatsScreen.kt b/kmp/src/commonMain/kotlin/dev/stapler/stelekit/ui/screens/LibraryStatsScreen.kt new file mode 100644 index 0000000..9fd14ab --- /dev/null +++ b/kmp/src/commonMain/kotlin/dev/stapler/stelekit/ui/screens/LibraryStatsScreen.kt @@ -0,0 +1,265 @@ +// Copyright (c) 2026 Tyler Stapler +// SPDX-License-Identifier: Elastic-2.0 + +package dev.stapler.stelekit.ui.screens + +import androidx.compose.foundation.background +import androidx.compose.foundation.layout.* +import androidx.compose.foundation.lazy.LazyColumn +import androidx.compose.material.icons.Icons +import androidx.compose.material.icons.filled.Refresh +import androidx.compose.material3.* +import androidx.compose.runtime.* +import androidx.compose.ui.Alignment +import androidx.compose.ui.Modifier +import androidx.compose.ui.text.font.FontWeight +import androidx.compose.ui.unit.dp +import dev.stapler.stelekit.performance.NavigationTracingEffect +import dev.stapler.stelekit.stats.GraphStatsReport +import dev.stapler.stelekit.stats.NamespaceStat +import kotlin.math.roundToInt + +@Composable +fun LibraryStatsScreen( + viewModel: LibraryStatsViewModel, + modifier: Modifier = Modifier, +) { + NavigationTracingEffect("LibraryStats") + val state by viewModel.state.collectAsState() + + LaunchedEffect(Unit) { + if (state == LibraryStatsState.Idle) viewModel.load() + } + + Column(modifier = modifier.fillMaxSize()) { + Row( + modifier = Modifier.fillMaxWidth().padding(horizontal = 16.dp, vertical = 12.dp), + verticalAlignment = Alignment.CenterVertically, + ) { + Text("Library Stats", style = MaterialTheme.typography.titleLarge, modifier = Modifier.weight(1f)) + when (state) { + LibraryStatsState.Loading -> CircularProgressIndicator(modifier = Modifier.size(24.dp), strokeWidth = 2.dp) + else -> IconButton(onClick = { viewModel.load() }) { + Icon(Icons.Default.Refresh, contentDescription = "Refresh") + } + } + } + HorizontalDivider() + + when (val s = state) { + LibraryStatsState.Idle -> { + Box(Modifier.fillMaxSize(), contentAlignment = Alignment.Center) { + Text("Tap refresh to scan your library.", color = MaterialTheme.colorScheme.onSurfaceVariant) + } + } + LibraryStatsState.Loading -> { + Box(Modifier.fillMaxSize(), contentAlignment = Alignment.Center) { + Column(horizontalAlignment = Alignment.CenterHorizontally, verticalArrangement = Arrangement.spacedBy(12.dp)) { + CircularProgressIndicator() + Text("Scanning library…", color = MaterialTheme.colorScheme.onSurfaceVariant) + } + } + } + is LibraryStatsState.Error -> { + Box(Modifier.fillMaxSize(), contentAlignment = Alignment.Center) { + Text(s.message, color = MaterialTheme.colorScheme.error) + } + } + is LibraryStatsState.Loaded -> StatsContent(s.report) + } + } +} + +@Composable +private fun StatsContent(report: GraphStatsReport) { + LazyColumn( + modifier = Modifier.fillMaxSize(), + contentPadding = PaddingValues(16.dp), + verticalArrangement = Arrangement.spacedBy(12.dp), + ) { + item { + val span = if (report.firstJournalDate != null && report.lastJournalDate != null) + "${report.firstJournalDate} → ${report.lastJournalDate}" else "(no dated journals)" + Text( + "${report.pageCount.withCommas()} pages · ${report.journalCount.withCommas()} journals · $span", + style = MaterialTheme.typography.bodyMedium, + color = MaterialTheme.colorScheme.onSurfaceVariant, + ) + } + + item { + StatsCard("Volume") { + StatRow("Pages", report.pageCount.withCommas()) + StatRow("Journals", report.journalCount.withCommas()) + StatRow("Total blocks", report.totalBlocks.withCommas()) + StatRow("Avg blocks / page", report.avgBlocksPerPage.fmt1dp()) + StatRow("Empty pages", report.pagesWithNoContent.withCommas()) + StatRow("Hashtags", report.totalHashtags.withCommas()) + } + } + + item { + StatsCard("Link Topology") { + StatRow("Total wiki links", report.totalOutgoingLinks.withCommas()) + StatRow("Block link density", report.blockLinkDensity.pct()) + StatRow("Pages with outgoing links", report.pagesWithOutgoingLinks.withPct(report.pageCount)) + StatRow("Pages with incoming links", report.pagesWithIncomingLinks.withPct(report.pageCount)) + StatRow("Avg outgoing / page", report.avgOutgoingLinksPerPage.fmt1dp()) + StatRow("Avg incoming / page", report.avgIncomingLinksPerPage.fmt1dp()) + StatRow("Max incoming links", report.maxIncomingLinks.withCommas()) + StatRow("Max outgoing links", report.maxOutgoingLinks.withCommas()) + } + } + + if (report.topByIncomingLinks.isNotEmpty()) { + item { + StatsCard("Top Pages by Incoming Links") { + report.topByIncomingLinks.take(10).forEachIndexed { i, p -> + Row(modifier = Modifier.fillMaxWidth().padding(vertical = 2.dp)) { + Text( + "${i + 1}.", + style = MaterialTheme.typography.bodySmall, + color = MaterialTheme.colorScheme.onSurfaceVariant, + modifier = Modifier.width(28.dp), + ) + Text(p.name.take(36), style = MaterialTheme.typography.bodySmall, modifier = Modifier.weight(1f)) + Text( + "← ${p.incomingLinks.withCommas()}", + style = MaterialTheme.typography.bodySmall, + color = MaterialTheme.colorScheme.primary, + modifier = Modifier.width(60.dp), + ) + Text( + "→ ${p.outgoingLinks.withCommas()}", + style = MaterialTheme.typography.bodySmall, + color = MaterialTheme.colorScheme.onSurfaceVariant, + modifier = Modifier.width(60.dp), + ) + } + } + } + } + } + + if (report.incomingLinkHistogram.isNotEmpty()) { + item { + StatsCard("Incoming Links per Page") { + IntHistogram(report.incomingLinkHistogram, "pages") + } + } + } + + if (report.journalsByYear.isNotEmpty()) { + item { + StatsCard("Journal Activity by Year") { + StatRow("Fill rate", "${(report.journalFillRate * 100).roundToInt()}% of days") + Spacer(Modifier.height(8.dp)) + val maxCount = report.journalsByYear.values.maxOrNull()?.coerceAtLeast(1) ?: 1 + report.journalsByYear.forEach { (year, count) -> + BarChartRow(year, count, maxCount, "entries") + } + } + } + } + + if (report.topNamespaces.isNotEmpty()) { + item { + StatsCard("Top Namespaces") { + report.topNamespaces.forEach { ns: NamespaceStat -> + StatRow(ns.namespace, "${ns.count.withCommas()} pages") + } + } + } + } + } +} + +@Composable +private fun StatsCard(title: String, content: @Composable ColumnScope.() -> Unit) { + Card(modifier = Modifier.fillMaxWidth()) { + Column(modifier = Modifier.padding(16.dp), verticalArrangement = Arrangement.spacedBy(4.dp)) { + Text(title, style = MaterialTheme.typography.titleSmall, fontWeight = FontWeight.SemiBold) + HorizontalDivider(modifier = Modifier.padding(vertical = 4.dp)) + content() + } + } +} + +@Composable +private fun StatRow(label: String, value: String) { + Row(modifier = Modifier.fillMaxWidth()) { + Text( + label, + style = MaterialTheme.typography.bodySmall, + color = MaterialTheme.colorScheme.onSurfaceVariant, + modifier = Modifier.weight(1f), + ) + Text(value, style = MaterialTheme.typography.bodySmall) + } +} + +@Composable +private fun IntHistogram(histogram: Map, unit: String) { + val maxCount = histogram.values.maxOrNull()?.coerceAtLeast(1) ?: 1 + for (bucket in 0..20) { + val count = histogram[bucket] ?: 0 + if (count == 0 && bucket > 0 && (histogram.keys.maxOrNull() ?: 0) < bucket) break + BarChartRow(if (bucket == 20) "20+" else "$bucket", count, maxCount, unit) + } +} + +@Composable +private fun BarChartRow(label: String, count: Int, maxCount: Int, unit: String) { + val fraction = if (maxCount > 0) (count.toFloat() / maxCount).coerceIn(0f, 1f) else 0f + Row( + modifier = Modifier.fillMaxWidth().height(22.dp), + verticalAlignment = Alignment.CenterVertically, + ) { + Text( + label, + modifier = Modifier.width(40.dp), + style = MaterialTheme.typography.labelSmall, + color = MaterialTheme.colorScheme.onSurfaceVariant, + ) + Box(modifier = Modifier.weight(1f).height(14.dp)) { + Box( + modifier = Modifier + .fillMaxWidth(fraction) + .fillMaxHeight() + .background(MaterialTheme.colorScheme.primary.copy(alpha = 0.6f)), + ) + } + Text( + "${count.withCommas()} $unit", + modifier = Modifier.width(88.dp).padding(start = 8.dp), + style = MaterialTheme.typography.labelSmall, + color = MaterialTheme.colorScheme.onSurfaceVariant, + ) + } +} + +// ── KMP-safe number formatting helpers ────────────────────────────────────── + +private fun Int.withCommas(): String { + if (this < 0) return "-${(-this).withCommas()}" + if (this < 1000) return toString() + val s = toString() + val sb = StringBuilder() + s.reversed().forEachIndexed { i, c -> + if (i > 0 && i % 3 == 0) sb.append(',') + sb.append(c) + } + return sb.reverse().toString() +} + +private fun Float.fmt1dp(): String { + val rounded = (this * 10).roundToInt() + return "${rounded / 10}.${rounded % 10}" +} + +private fun Float.pct(): String = "${(this * 100).roundToInt()}%" + +private fun Int.withPct(total: Int): String { + val p = if (total > 0) (this * 100f / total).roundToInt() else 0 + return "${withCommas()} ($p%)" +} diff --git a/kmp/src/commonMain/kotlin/dev/stapler/stelekit/ui/screens/LibraryStatsViewModel.kt b/kmp/src/commonMain/kotlin/dev/stapler/stelekit/ui/screens/LibraryStatsViewModel.kt new file mode 100644 index 0000000..b6f0c52 --- /dev/null +++ b/kmp/src/commonMain/kotlin/dev/stapler/stelekit/ui/screens/LibraryStatsViewModel.kt @@ -0,0 +1,50 @@ +// Copyright (c) 2026 Tyler Stapler +// SPDX-License-Identifier: Elastic-2.0 + +package dev.stapler.stelekit.ui.screens + +import dev.stapler.stelekit.stats.GraphStatsReport +import dev.stapler.stelekit.stats.LibraryStatsProvider +import dev.stapler.stelekit.stats.NoOpLibraryStatsProvider +import kotlinx.coroutines.CoroutineScope +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.SupervisorJob +import kotlinx.coroutines.cancel +import kotlinx.coroutines.flow.MutableStateFlow +import kotlinx.coroutines.flow.StateFlow +import kotlinx.coroutines.flow.asStateFlow +import kotlinx.coroutines.launch + +sealed class LibraryStatsState { + data object Idle : LibraryStatsState() + data object Loading : LibraryStatsState() + data class Loaded(val report: GraphStatsReport) : LibraryStatsState() + data class Error(val message: String) : LibraryStatsState() +} + +class LibraryStatsViewModel( + private val provider: LibraryStatsProvider = NoOpLibraryStatsProvider, + val graphPath: String = "", + scope: CoroutineScope = CoroutineScope(SupervisorJob() + Dispatchers.Default), +) { + private val scope = scope + private val _state = MutableStateFlow(LibraryStatsState.Idle) + val state: StateFlow = _state.asStateFlow() + + fun load() { + if (_state.value == LibraryStatsState.Loading) return + scope.launch { + _state.value = LibraryStatsState.Loading + val report = runCatching { provider.collect(graphPath) }.getOrElse { e -> + _state.value = LibraryStatsState.Error(e.message ?: "Scan failed") + return@launch + } + _state.value = if (report != null) + LibraryStatsState.Loaded(report) + else + LibraryStatsState.Error("Graph path not found: $graphPath") + } + } + + fun close() { scope.cancel() } +} diff --git a/kmp/src/jvmMain/kotlin/dev/stapler/stelekit/desktop/ui/App.kt b/kmp/src/jvmMain/kotlin/dev/stapler/stelekit/desktop/ui/App.kt index d2bb728..f770924 100644 --- a/kmp/src/jvmMain/kotlin/dev/stapler/stelekit/desktop/ui/App.kt +++ b/kmp/src/jvmMain/kotlin/dev/stapler/stelekit/desktop/ui/App.kt @@ -4,6 +4,7 @@ import androidx.compose.runtime.Composable import androidx.compose.ui.window.Window import androidx.compose.ui.window.WindowState import dev.stapler.stelekit.domain.UrlFetcherJvm +import dev.stapler.stelekit.stats.FileLibraryStatsProvider import dev.stapler.stelekit.ui.StelekitApp import dev.stapler.stelekit.platform.PlatformFileSystem @@ -22,7 +23,8 @@ fun App( StelekitApp( fileSystem = fileSystem, graphPath = graphPath, - urlFetcher = UrlFetcherJvm() + urlFetcher = UrlFetcherJvm(), + libraryStatsProvider = FileLibraryStatsProvider(), ) } } diff --git a/kmp/src/jvmMain/kotlin/dev/stapler/stelekit/stats/FileLibraryStatsProvider.kt b/kmp/src/jvmMain/kotlin/dev/stapler/stelekit/stats/FileLibraryStatsProvider.kt new file mode 100644 index 0000000..139b6bf --- /dev/null +++ b/kmp/src/jvmMain/kotlin/dev/stapler/stelekit/stats/FileLibraryStatsProvider.kt @@ -0,0 +1,16 @@ +// Copyright (c) 2026 Tyler Stapler +// SPDX-License-Identifier: Elastic-2.0 + +package dev.stapler.stelekit.stats + +import java.io.File + +class FileLibraryStatsProvider : LibraryStatsProvider { + private val collector = GraphStatsCollector() + + override suspend fun collect(graphPath: String): GraphStatsReport? { + val dir = File(graphPath) + if (!dir.isDirectory) return null + return collector.collect(dir) + } +} diff --git a/kmp/src/jvmMain/kotlin/dev/stapler/stelekit/stats/GraphStatsCollector.kt b/kmp/src/jvmMain/kotlin/dev/stapler/stelekit/stats/GraphStatsCollector.kt new file mode 100644 index 0000000..2ffe672 --- /dev/null +++ b/kmp/src/jvmMain/kotlin/dev/stapler/stelekit/stats/GraphStatsCollector.kt @@ -0,0 +1,197 @@ +// Copyright (c) 2026 Tyler Stapler +// SPDX-License-Identifier: Elastic-2.0 + +package dev.stapler.stelekit.stats + +import java.io.File +import kotlin.math.roundToInt + +/** + * Scans a Logseq/Stelekit graph directory and computes [GraphStatsReport] from raw markdown. + * No SQLite or GraphLoader dependency — just file I/O and regex. + * + * Covers pages/ and journals/ sub-directories. Journal dates are parsed from filenames + * in YYYY_MM_DD or YYYY-MM-DD format. + */ +class GraphStatsCollector { + + // Matches [[PageName]] and [[PageName|Alias]] — captures only the page name. + private val wikiLinkRegex = Regex("""\[\[([^\]|]+?)(?:\|[^\]]*)?]]""") + // Matches #[[TagName]] and bare #word (up to next space/punctuation). + private val hashtagRegex = Regex("""#(?:\[\[([^\]]+)]]|(\w[\w\s-]*))""") + // Matches any bullet block line. + private val blockLineRegex = Regex("""^\s*-\s""") + // Journal filename: YYYY_MM_DD or YYYY-MM-DD (with optional .md). + private val journalDateRegex = Regex("""^(\d{4})[-_](\d{2})[-_](\d{2})""") + + private data class FileScan( + val name: String, + val blocks: Int, + val blocksWithLinks: Int, + val outgoing: List, + val hashtags: Int, + ) + + fun collect(graphDir: File): GraphStatsReport { + val pagesDir = File(graphDir, "pages") + val journalsDir = File(graphDir, "journals") + + fun pageNameFromFile(f: File): String = + f.nameWithoutExtension + .replace('%', '/') + .replace('_', ' ') + + fun scan(file: File, name: String): FileScan { + val content = runCatching { file.readText() }.getOrElse { return FileScan(name, 0, 0, emptyList(), 0) } + val lines = content.lines() + var blocks = 0 + var blocksWithLinks = 0 + var hashtags = 0 + val outgoing = mutableListOf() + + for (line in lines) { + if (!blockLineRegex.containsMatchIn(line)) continue + blocks++ + val links = wikiLinkRegex.findAll(line).map { it.groupValues[1].trim() }.filter { it.isNotEmpty() && it != name }.toList() + if (links.isNotEmpty()) { blocksWithLinks++; outgoing += links } + hashtags += hashtagRegex.findAll(line).count() + } + + return FileScan(name, blocks, blocksWithLinks, outgoing.distinct(), hashtags) + } + + // Scan pages + val pages = (pagesDir.listFiles { f -> f.extension == "md" } ?: emptyArray()) + .map { scan(it, pageNameFromFile(it)) } + + // Scan journals, extract dates + data class JournalEntry(val scan: FileScan, val date: String?) + val journals = (journalsDir.listFiles { f -> f.extension == "md" } ?: emptyArray()) + .map { f -> + val m = journalDateRegex.find(f.nameWithoutExtension) + val date = m?.let { "${it.groupValues[1]}-${it.groupValues[2]}-${it.groupValues[3]}" } + JournalEntry(scan(f, pageNameFromFile(f)), date) + } + + // Build incoming link index across pages + journals + val incomingCount = mutableMapOf() + for (s in pages + journals.map { it.scan }) { + for (target in s.outgoing) incomingCount[target] = (incomingCount[target] ?: 0) + 1 + } + + val allScans = pages + journals.map { it.scan } + val totalBlocks = allScans.sumOf { it.blocks } + val totalOutgoing = allScans.sumOf { it.outgoing.size } + val totalHashtags = allScans.sumOf { it.hashtags } + val blocksWithLinks = allScans.sumOf { it.blocksWithLinks } + val blockLinkDensity = if (totalBlocks > 0) blocksWithLinks.toFloat() / totalBlocks else 0f + + // Per-page connectivity + fun connectivity(s: FileScan) = PageConnectivity(s.name, incomingCount[s.name] ?: 0, s.outgoing.size) + + val pageConnectivity = pages.map { connectivity(it) } + + val topByIncoming = pageConnectivity.sortedByDescending { it.incomingLinks }.take(15) + val topByOutgoing = pageConnectivity.sortedByDescending { it.outgoingLinks }.take(15) + + // Histograms — bucket anything > 20 into the "20" bin for display + fun histogram(values: Iterable): Map = + values.groupingBy { it.coerceAtMost(20) }.eachCount().toSortedMap() + + val incomingHistogram = histogram(pageConnectivity.map { it.incomingLinks }) + val outgoingHistogram = histogram(pageConnectivity.map { it.outgoingLinks }) + + // Time span + val datedJournals = journals.mapNotNull { it.date }.sorted() + val firstDate = datedJournals.firstOrNull() + val lastDate = datedJournals.lastOrNull() + val spanDays = if (firstDate != null && lastDate != null) daysBetween(firstDate, lastDate) else 0 + val fillRate = if (spanDays > 0) datedJournals.size.toFloat() / spanDays else 0f + + // Growth + val journalsByYear = datedJournals.groupingBy { it.take(4) }.eachCount().toSortedMap() + val journalsByMonth = datedJournals.groupingBy { it.take(7) }.eachCount().toSortedMap() + + // Namespaces (pages whose name contains '/') + val topNamespaces = pages + .filter { '/' in it.name } + .groupingBy { it.name.substringBefore('/') } + .eachCount() + .entries + .sortedByDescending { it.value } + .take(10) + .map { NamespaceStat(it.key, it.value) } + + val avgBlocks = if (pages.isNotEmpty()) totalBlocks.toFloat() / pages.size else 0f + val avgOutgoing = if (pages.isNotEmpty()) totalOutgoing.toFloat() / pages.size else 0f + val avgIncoming = if (pages.isNotEmpty()) incomingCount.values.sum().toFloat() / pages.size else 0f + val maxIncoming = pageConnectivity.maxOfOrNull { it.incomingLinks } ?: 0 + val maxOutgoing = pageConnectivity.maxOfOrNull { it.outgoingLinks } ?: 0 + val pagesWithLinks = pageConnectivity.count { it.outgoingLinks > 0 } + val pagesReferenced = pageConnectivity.count { it.incomingLinks > 0 } + val pagesEmpty = pages.count { it.blocks == 0 } + + // Use p25/p75 so the suggested blocksPerPage range produces a realistic average + // when the generator picks uniformly. p10/p90 are too wide — the library has pages + // with 100+ blocks (synthesis/MOC pages) that would skew the synthetic average up. + val blocksPerPageP10 = percentile(pages.map { it.blocks }, 0.25).coerceAtLeast(1) + val blocksPerPageP90 = percentile(pages.map { it.blocks }, 0.75) + + val targets = BenchmarkTargets( + pageCount = pages.size * 2, + journalCount = journals.size * 2, + linkDensity = (blockLinkDensity * 100).roundToInt() / 100f, + blocksPerPageMin = blocksPerPageP10, + blocksPerPageMax = blocksPerPageP90, + ) + + return GraphStatsReport( + graphPath = graphDir.absolutePath, + pageCount = pages.size, + journalCount = journals.size, + totalBlocks = totalBlocks, + pagesWithNoContent = pagesEmpty, + totalOutgoingLinks = totalOutgoing, + totalHashtags = totalHashtags, + pagesWithOutgoingLinks = pagesWithLinks, + pagesWithIncomingLinks = pagesReferenced, + avgOutgoingLinksPerPage = avgOutgoing, + avgIncomingLinksPerPage = avgIncoming, + maxIncomingLinks = maxIncoming, + maxOutgoingLinks = maxOutgoing, + blockLinkDensity = blockLinkDensity, + incomingLinkHistogram = incomingHistogram, + outgoingLinkHistogram = outgoingHistogram, + topByIncomingLinks = topByIncoming, + topByOutgoingLinks = topByOutgoing, + firstJournalDate = firstDate, + lastJournalDate = lastDate, + journalDays = datedJournals.size, + journalSpanDays = spanDays, + journalFillRate = fillRate, + avgBlocksPerPage = avgBlocks, + journalsByYear = journalsByYear, + journalsByMonth = journalsByMonth, + topNamespaces = topNamespaces, + benchmarkTargets = targets, + ) + } + + private fun percentile(values: List, p: Double): Int { + if (values.isEmpty()) return 0 + val sorted = values.sorted() + return sorted[(sorted.size * p).toInt().coerceIn(0, sorted.size - 1)] + } + + private fun daysBetween(a: String, b: String): Int { + fun epochDay(s: String): Long { + val (y, m, d) = s.split("-").map { it.toInt() } + // Julian Day Number formula + val jdn = (1461L * (y + 4800 + (m - 14) / 12)) / 4 + + (367L * (m - 2 - 12 * ((m - 14) / 12))) / 12 - + (3L * ((y + 4900 + (m - 14) / 12) / 100)) / 4 + d - 32075 + return jdn + } + return (epochDay(b) - epochDay(a)).toInt() + } +} diff --git a/kmp/src/jvmTest/kotlin/dev/stapler/stelekit/benchmark/GraphLoadTimingTest.kt b/kmp/src/jvmTest/kotlin/dev/stapler/stelekit/benchmark/GraphLoadTimingTest.kt index dc2fbba..57a9bb1 100644 --- a/kmp/src/jvmTest/kotlin/dev/stapler/stelekit/benchmark/GraphLoadTimingTest.kt +++ b/kmp/src/jvmTest/kotlin/dev/stapler/stelekit/benchmark/GraphLoadTimingTest.kt @@ -352,7 +352,9 @@ class GraphLoadTimingTest { java.io.File(outputDir, "benchmark-load.json"), mapOf( "graphConfig" to System.getProperty("STELEKIT_BENCH_CONFIG", "SMALL").lowercase(), - "pageCount" to result.pageCount, + // Use generator stats for accurate count — result.pageCount queries getAllPages() + // before the async write actor drains, so it returns 0. + "pageCount" to (stats.pageCount + stats.journalCount), "journalCount" to stats.journalCount, "phase1TtiMs" to result.phase1TtiMs, "phase2Ms" to result.phase2Ms, diff --git a/kmp/src/jvmTest/kotlin/dev/stapler/stelekit/benchmark/SyntheticGraphGenerator.kt b/kmp/src/jvmTest/kotlin/dev/stapler/stelekit/benchmark/SyntheticGraphGenerator.kt index 718fcf3..9807db5 100644 --- a/kmp/src/jvmTest/kotlin/dev/stapler/stelekit/benchmark/SyntheticGraphGenerator.kt +++ b/kmp/src/jvmTest/kotlin/dev/stapler/stelekit/benchmark/SyntheticGraphGenerator.kt @@ -10,16 +10,25 @@ import kotlin.random.Random * link density, and block hierarchy depth. All output is deterministic for a given seed. * * Usage: - * val gen = SyntheticGraphGenerator(SyntheticGraphGenerator.DENSE) + * val gen = SyntheticGraphGenerator(SyntheticGraphGenerator.XLARGE) * val stats = gen.generate(outputDir) * // stats.graphDir contains pages/ and journals/ ready for GraphLoader * * Preset configs: - * TINY — 50 pages, 14 journals, sparse links (quick smoke test) - * SMALL — 200 pages, 30 journals, moderate links (CI regression baseline) - * MEDIUM — 500 pages, 90 journals, moderate links (realistic personal library) - * LARGE — 2000 pages, 365 journals, dense links (stress test) - * MESH — 500 pages, 90 journals, 80% link density (worst-case Aho-Corasick) + * TINY — 50 pages, 14 journals, sparse links (quick smoke test) + * SMALL — 200 pages, 30 journals, moderate links (CI regression baseline) + * MEDIUM — 500 pages, 90 journals, moderate links (realistic small library) + * LARGE — 2000 pages, 365 journals, dense links (stress test) + * XLARGE — 7978 pages, 2930 journals, power-law topology (2× a real measured library) + * MESH — 500 pages, 90 journals, 80% link density (worst-case Aho-Corasick) + * + * Hub pages (hubFraction > 0): + * A fraction of pages are designated "hubs" — heavily-referenced pages like index + * pages, MOCs, or frequently-linked concepts. They are picked as link targets with + * hubLinkWeight× higher probability than leaf pages, and themselves contain more + * blocks with higher outgoing link density. This mirrors real personal libraries + * where a few pages (e.g. "Programming", "Health", "MOC-Projects") accumulate + * hundreds of backlinks. */ class SyntheticGraphGenerator(val config: Config = Config()) { @@ -36,6 +45,14 @@ class SyntheticGraphGenerator(val config: Config = Config()) { val hierarchyDepth: Int = 3, /** Fraction of pages that use namespace notation ("Area/Topic"). */ val namespaceFraction: Float = 0.15f, + /** + * Fraction of pages designated as "hubs". Hub pages are sampled as link + * targets with [hubLinkWeight]× higher probability than leaf pages, and + * contain more blocks with higher outgoing link density. + */ + val hubFraction: Float = 0.0f, + /** Relative weight of hub pages vs. leaf pages in link-target selection. */ + val hubLinkWeight: Float = 10.0f, /** Reproducible output — change to get a different graph shape. */ val seed: Long = 42L, ) @@ -49,11 +66,25 @@ class SyntheticGraphGenerator(val config: Config = Config()) { ) companion object { - val TINY = Config(pageCount = 50, journalCount = 14, linkDensity = 0.05f, blocksPerPage = 2..8, hierarchyDepth = 2) - val SMALL = Config(pageCount = 200, journalCount = 30, linkDensity = 0.20f, blocksPerPage = 3..15, hierarchyDepth = 3) - val MEDIUM = Config(pageCount = 500, journalCount = 90, linkDensity = 0.25f, blocksPerPage = 4..20, hierarchyDepth = 4) - val LARGE = Config(pageCount = 2000, journalCount = 365, linkDensity = 0.35f, blocksPerPage = 3..25, hierarchyDepth = 4) - val MESH = Config(pageCount = 500, journalCount = 90, linkDensity = 0.80f, linksPerBlock = 3..8, blocksPerPage = 4..15, hierarchyDepth = 3) + val TINY = Config(pageCount = 50, journalCount = 14, linkDensity = 0.05f, blocksPerPage = 2..8, hierarchyDepth = 2) + val SMALL = Config(pageCount = 200, journalCount = 30, linkDensity = 0.20f, blocksPerPage = 3..15, hierarchyDepth = 3) + val MEDIUM = Config(pageCount = 500, journalCount = 90, linkDensity = 0.25f, blocksPerPage = 4..20, hierarchyDepth = 4) + val LARGE = Config(pageCount = 2000, journalCount = 365, linkDensity = 0.35f, blocksPerPage = 3..25, hierarchyDepth = 4) + // Calibrated to 2× a real ~4k-page / 6-year personal library + // (measured via ./gradlew :kmp:graphStats -PgraphPath=~/Documents/personal-wiki/logseq). + // Real: 3989 pages, 1465 journals, 0.18 block-link density, p25/p75 blocks/page = 3..63. + val XLARGE = Config( + pageCount = 7_978, + journalCount = 2_930, + linkDensity = 0.18f, + linksPerBlock = 1..8, + blocksPerPage = 3..63, + hierarchyDepth = 4, + namespaceFraction = 0.20f, + hubFraction = 0.05f, + hubLinkWeight = 15.0f, + ) + val MESH = Config(pageCount = 500, journalCount = 90, linkDensity = 0.80f, linksPerBlock = 3..8, blocksPerPage = 4..15, hierarchyDepth = 3) } // ── word pools for generating realistic page names ───────────────────── @@ -113,15 +144,26 @@ class SyntheticGraphGenerator(val config: Config = Config()) { fun generate(outputDir: File): Stats { val rng = Random(config.seed) outputDir.mkdirs() - val pagesDir = File(outputDir, "pages").also { it.mkdirs() } + val pagesDir = File(outputDir, "pages").also { it.mkdirs() } val journalsDir = File(outputDir, "journals").also { it.mkdirs() } val pageNames = buildPageNames(rng) + + // Designate first hubFraction of pages as hubs. Hub pages accumulate far more + // backlinks (they're sampled with hubLinkWeight× higher probability) and also + // have more outgoing links and blocks — mirroring MOC / index pages in real libs. + val hubCount = (config.pageCount * config.hubFraction).toInt() + val hubPages = pageNames.take(hubCount).toHashSet() + + // Pre-build a weighted link pool so link-target selection is O(1) per block + // instead of O(n) (no more full shuffle of allPages per block). + val linkPool = buildLinkPool(pageNames, hubPages) + var totalBlocks = 0 var totalLinks = 0 for (name in pageNames) { - val (content, blocks, links) = generatePageContent(name, pageNames, rng) + val (content, blocks, links) = generatePageContent(name, linkPool, rng, isHub = name in hubPages) totalBlocks += blocks totalLinks += links val fileName = name.replace('/', '%').replace(' ', '_') + ".md" @@ -130,7 +172,7 @@ class SyntheticGraphGenerator(val config: Config = Config()) { for (i in 0 until config.journalCount) { val date = baseDate.minusDays(i) - val (content, blocks, links) = generateJournalContent(date, pageNames, rng) + val (content, blocks, links) = generateJournalContent(date, linkPool, rng) totalBlocks += blocks totalLinks += links val fileName = "${date.year}_${date.month.toString().padStart(2, '0')}_${date.day.toString().padStart(2, '0')}.md" @@ -176,20 +218,56 @@ class SyntheticGraphGenerator(val config: Config = Config()) { return names.take(config.pageCount).toList() } + /** + * Builds a weighted list used for O(1) link-target sampling. + * Hub pages are repeated [hubLinkWeight] times so random index lookups + * naturally pick them with the correct relative frequency. + */ + private fun buildLinkPool(pageNames: List, hubPages: Set): List { + if (hubPages.isEmpty()) return pageNames + val weight = config.hubLinkWeight.toInt().coerceAtLeast(1) + val pool = ArrayList(pageNames.size + hubPages.size * (weight - 1)) + for (name in pageNames) { + if (name in hubPages) repeat(weight) { pool.add(name) } else pool.add(name) + } + return pool + } + private data class GeneratedContent(val markdown: String, val blockCount: Int, val linkCount: Int) private fun generatePageContent( pageName: String, - allPages: List, + linkPool: List, rng: Random, + isHub: Boolean = false, ): GeneratedContent { + // Hub pages get more blocks and higher outgoing-link density to simulate + // MOC/index pages that heavily reference the rest of the graph. + val effectiveBlocksPerPage = if (isHub) + (config.blocksPerPage.last)..(config.blocksPerPage.last * 3) + else + config.blocksPerPage + val effectiveLinkDensity = if (isHub) + (config.linkDensity * 2.5f).coerceAtMost(0.85f) + else + config.linkDensity + val effectiveLinksPerBlock = if (isHub) + config.linksPerBlock.first..(config.linksPerBlock.last * 2).coerceAtMost(20) + else + config.linksPerBlock + val sb = StringBuilder() - val blockCount = config.blocksPerPage.random(rng) + val blockCount = effectiveBlocksPerPage.random(rng) var blocks = 0 var links = 0 repeat(blockCount) { i -> - val (line, lineLinks) = generateBlock(pageName, allPages, rng, indent = 0, position = i) + val (line, lineLinks) = generateBlock( + pageName, linkPool, rng, + indent = 0, position = i, + effectiveLinkDensity = effectiveLinkDensity, + effectiveLinksPerBlock = effectiveLinksPerBlock, + ) sb.append(line) links += lineLinks blocks += 1 @@ -199,7 +277,12 @@ class SyntheticGraphGenerator(val config: Config = Config()) { val childDepth = (1 until minOf(config.hierarchyDepth, 4)).random(rng) val childCount = (1..3).random(rng) repeat(childCount) { j -> - val (child, childLinks) = generateBlock(pageName, allPages, rng, indent = childDepth, position = j) + val (child, childLinks) = generateBlock( + pageName, linkPool, rng, + indent = childDepth, position = j, + effectiveLinkDensity = effectiveLinkDensity, + effectiveLinksPerBlock = effectiveLinksPerBlock, + ) sb.append(child) links += childLinks blocks += 1 @@ -212,7 +295,7 @@ class SyntheticGraphGenerator(val config: Config = Config()) { private fun generateJournalContent( date: SimpleDate, - allPages: List, + linkPool: List, rng: Random, ): GeneratedContent { val sb = StringBuilder() @@ -221,7 +304,7 @@ class SyntheticGraphGenerator(val config: Config = Config()) { var links = 0 repeat(blockCount) { i -> - val (line, lineLinks) = generateBlock("journal-${date}", allPages, rng, indent = 0, position = i) + val (line, lineLinks) = generateBlock("journal-${date}", linkPool, rng, indent = 0, position = i) sb.append(line) links += lineLinks blocks += 1 @@ -232,10 +315,12 @@ class SyntheticGraphGenerator(val config: Config = Config()) { private fun generateBlock( sourcePage: String, - allPages: List, + linkPool: List, rng: Random, indent: Int, position: Int, + effectiveLinkDensity: Float = config.linkDensity, + effectiveLinksPerBlock: IntRange = config.linksPerBlock, ): Pair { val prefix = " ".repeat(indent) + "- " val fragment = sentenceFragments.random(rng) @@ -246,12 +331,22 @@ class SyntheticGraphGenerator(val config: Config = Config()) { sb.append(prefix) sb.append("$fragment $filler") - if (rng.nextFloat() < config.linkDensity) { - val linkCount = config.linksPerBlock.random(rng) - val candidates = allPages.filter { it != sourcePage }.shuffled(rng) - for (k in 0 until minOf(linkCount, candidates.size)) { - sb.append(" [[${candidates[k]}]]") - links++ + if (rng.nextFloat() < effectiveLinkDensity) { + val linkCount = effectiveLinksPerBlock.random(rng) + // O(1) random sampling from the pre-built weighted pool. + // Track seen names to avoid duplicates within a block. + val seen = mutableSetOf(sourcePage) + var k = 0 + var attempts = 0 + val maxAttempts = linkCount * 5 + while (k < linkCount && attempts < maxAttempts) { + val candidate = linkPool[rng.nextInt(linkPool.size)] + if (seen.add(candidate)) { + sb.append(" [[${candidate}]]") + links++ + k++ + } + attempts++ } } diff --git a/kmp/src/jvmTest/kotlin/dev/stapler/stelekit/stats/LibraryWrappedTest.kt b/kmp/src/jvmTest/kotlin/dev/stapler/stelekit/stats/LibraryWrappedTest.kt new file mode 100644 index 0000000..0cf5489 --- /dev/null +++ b/kmp/src/jvmTest/kotlin/dev/stapler/stelekit/stats/LibraryWrappedTest.kt @@ -0,0 +1,180 @@ +// Copyright (c) 2026 Tyler Stapler +// SPDX-License-Identifier: Elastic-2.0 + +package dev.stapler.stelekit.stats + +import java.io.File +import kotlin.test.Test + +/** + * Prints a "Spotify Wrapped"-style summary of a Logseq/Stelekit library. + * + * Usage: + * ./gradlew :kmp:graphStats -PgraphPath=/path/to/your/logseq + * + * Outputs: + * - Volume: pages, journals, blocks, links + * - Link topology: incoming/outgoing distribution, top hub pages + * - Journal activity by year (bar chart) + * - Benchmark targets calibrated to 2× this library + */ +class LibraryWrappedTest { + + @Test + fun `library wrapped`() { + val graphPath = System.getProperty("STELEKIT_GRAPH_PATH") + if (graphPath.isNullOrBlank()) { + println("[library-wrapped] SKIPPED — run with: ./gradlew :kmp:graphStats -PgraphPath=/your/logseq") + return + } + val dir = File(graphPath) + require(dir.isDirectory) { "Graph path not found: $graphPath" } + + println("\nScanning $graphPath …") + val report = GraphStatsCollector().collect(dir) + printWrapped(report) + } + + // ── formatting ───────────────────────────────────────────────────────── + + private fun printWrapped(r: GraphStatsReport) { + val w = 66 + val hr = "─".repeat(w) + val eq = "═".repeat(w) + + fun fmt(n: Int) = "%,d".format(n) + fun fmtF(f: Float) = "%.2f".format(f) + fun pct(f: Float) = "%.0f%%".format(f * 100) + fun row(label: String, value: String) = println(" %-34s %s".format(label, value)) + + println() + println(eq) + println(" STELEKIT LIBRARY STATS") + println(" ${r.graphPath}") + println(eq) + + // ── Overview line ───────────────────────────────────────────────── + val span = if (r.firstJournalDate != null && r.lastJournalDate != null) + "${r.firstJournalDate} → ${r.lastJournalDate}" + else "(no dated journals)" + println() + println(" ${fmt(r.pageCount)} pages · ${fmt(r.journalCount)} journals · $span") + println() + + // ── Volume ──────────────────────────────────────────────────────── + println(" VOLUME") + println(" $hr") + row("Pages", fmt(r.pageCount)) + row("Journals", fmt(r.journalCount)) + row("Total blocks", fmt(r.totalBlocks)) + row("Avg blocks/page", fmtF(r.avgBlocksPerPage)) + row("Pages with no content", fmt(r.pagesWithNoContent)) + println() + + // ── Link topology ───────────────────────────────────────────────── + println(" LINK TOPOLOGY") + println(" $hr") + row("Total wiki links (outgoing)", fmt(r.totalOutgoingLinks)) + row("Total hashtags", fmt(r.totalHashtags)) + row("Pages with outgoing links", "${fmt(r.pagesWithOutgoingLinks)} (${pct(r.pagesWithOutgoingLinks.toFloat() / r.pageCount.coerceAtLeast(1))})") + row("Pages with incoming links", "${fmt(r.pagesWithIncomingLinks)} (${pct(r.pagesWithIncomingLinks.toFloat() / r.pageCount.coerceAtLeast(1))})") + row("Avg outgoing links/page", fmtF(r.avgOutgoingLinksPerPage)) + row("Avg incoming links/page", fmtF(r.avgIncomingLinksPerPage)) + row("Max incoming links", fmt(r.maxIncomingLinks)) + row("Max outgoing links", fmt(r.maxOutgoingLinks)) + row("Block link density", "${pct(r.blockLinkDensity)} (blocks containing ≥1 link)") + println() + + // ── Outgoing link distribution ──────────────────────────────────── + println(" OUTGOING LINKS PER PAGE (pages grouped by link count)") + println(" $hr") + printHistogram(r.outgoingLinkHistogram, label20 = "20+") + println() + + // ── Incoming link distribution ──────────────────────────────────── + println(" INCOMING LINKS PER PAGE (how often each page is referenced)") + println(" $hr") + printHistogram(r.incomingLinkHistogram, label20 = "20+") + println() + + // ── Top hub pages ───────────────────────────────────────────────── + println(" TOP PAGES BY INCOMING LINKS (hub / index pages)") + println(" $hr") + r.topByIncomingLinks.forEachIndexed { i, p -> + println(" %2d. %-40s ← %4d → %4d".format(i + 1, p.name.take(40), p.incomingLinks, p.outgoingLinks)) + } + println() + + println(" TOP PAGES BY OUTGOING LINKS (pages that reference the most)") + println(" $hr") + r.topByOutgoingLinks.forEachIndexed { i, p -> + println(" %2d. %-40s → %4d ← %4d".format(i + 1, p.name.take(40), p.outgoingLinks, p.incomingLinks)) + } + println() + + // ── Journal activity ────────────────────────────────────────────── + if (r.journalsByYear.isNotEmpty()) { + println(" JOURNAL ACTIVITY BY YEAR") + println(" $hr") + println(" Fill rate: ${pct(r.journalFillRate)} of days have a journal entry") + println() + val maxCount = r.journalsByYear.values.maxOrNull() ?: 1 + val barWidth = 28 + r.journalsByYear.forEach { (year, count) -> + val filled = (count.toFloat() / maxCount * barWidth).toInt() + val empty = barWidth - filled + val bar = "█".repeat(filled) + "░".repeat(empty) + println(" $year $bar ${fmt(count)}") + } + println() + } + + // ── Namespaces ──────────────────────────────────────────────────── + if (r.topNamespaces.isNotEmpty()) { + println(" TOP NAMESPACES") + println(" $hr") + r.topNamespaces.forEach { ns -> + println(" %-30s ${fmt(ns.count)} pages".format(ns.namespace)) + } + println() + } + + // ── Benchmark targets ───────────────────────────────────────────── + val t = r.benchmarkTargets + println(" BENCHMARK TARGETS (2× this library)") + println(" $hr") + row("Pages", "${fmt(t.pageCount)} (2× ${fmt(r.pageCount)})") + row("Journals", "${fmt(t.journalCount)} (2× ${fmt(r.journalCount)})") + row("Link density", fmtF(t.linkDensity)) + row("blocksPerPage", "${t.blocksPerPageMin}..${t.blocksPerPageMax}") + println() + println(" Paste into SyntheticGraphGenerator.XLARGE:") + println() + println(" val XLARGE = Config(") + println(" pageCount = ${t.pageCount},") + println(" journalCount = ${t.journalCount},") + println(" linkDensity = ${t.linkDensity}f,") + println(" blocksPerPage = ${t.blocksPerPageMin}..${t.blocksPerPageMax},") + println(" hubFraction = 0.05f,") + println(" hubLinkWeight = 15.0f,") + println(" )") + println() + + println(eq) + println() + } + + private fun printHistogram(histogram: Map, label20: String) { + if (histogram.isEmpty()) return + val maxCount = histogram.values.maxOrNull() ?: 1 + val barWidth = 24 + for (bucket in 0..20) { + val count = histogram[bucket] ?: 0 + if (count == 0 && bucket > 0 && histogram.keys.maxOrNull() ?: 0 < bucket) break + val label = if (bucket == 20) label20 else "$bucket" + val filled = (count.toFloat() / maxCount * barWidth).toInt() + val bar = "▓".repeat(filled) + println(" %4s %-${barWidth}s %,d pages".format(label, bar, count)) + } + } +}