From 11272dac9de2fafafc282c0368550d21097665d7 Mon Sep 17 00:00:00 2001 From: David Stegmueller Date: Sun, 10 May 2026 21:40:03 +0200 Subject: [PATCH 01/16] Fix scan not starting with large file sets Removed redundant findMatchingVideoFile call from the run:files_found handler and eliminated O(n^2) DB queries during initial file scan. With thousands of subtitle files, the synchronous forEach loop in the event handler blocked the event loop by calling findMatchingVideoFile (disk I/O) and emitFileUpdate (full table scan per file) for every file before batch processing could begin. Video path matching now only happens in processFile when the file is actually processed, and video_path is stored in the DB at that point. --- src/coordinator.ts | 14 ++++++-------- src/stateManager.ts | 6 ++++-- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/coordinator.ts b/src/coordinator.ts index 9a3b989..2386bc1 100644 --- a/src/coordinator.ts +++ b/src/coordinator.ts @@ -1,7 +1,6 @@ import { ProcessingEngine } from './processingEngine'; import { StateManager } from './stateManager'; import { ScanConfig } from './config'; -import { findMatchingVideoFile } from './findMatchingVideoFile'; import { Run } from './database'; import { once } from 'events'; @@ -33,16 +32,15 @@ export class ProcessingCoordinator { this.engine.on('run:files_found', (files: string[]) => { this.currentRunId = this.stateManager.startRun(files.length, this.enabledEngines); - // Add all files to database as pending - files.forEach((filePath) => { - const videoPath = findMatchingVideoFile(filePath); - this.stateManager.addFile(this.currentRunId!, filePath, videoPath); - }); + // Add all files to database as pending (video matching happens during processing) + for (const filePath of files) { + this.stateManager.addFile(this.currentRunId!, filePath, null); + } }); - this.engine.on('file:started', ({ srtPath }: { srtPath: string }) => { + this.engine.on('file:started', ({ srtPath, videoPath }: { srtPath: string; videoPath: string | null }) => { if (this.currentRunId) { - this.stateManager.updateFileStatus(this.currentRunId, srtPath, 'processing', null); + this.stateManager.updateFileStatus(this.currentRunId, srtPath, 'processing', null, videoPath); } }); diff --git a/src/stateManager.ts b/src/stateManager.ts index 8fa533e..d5218d6 100644 --- a/src/stateManager.ts +++ b/src/stateManager.ts @@ -105,14 +105,16 @@ export class StateManager extends EventEmitter { // File management addFile(runId: string, filePath: string, videoPath: string | null): void { this.db.createFileResult(runId, filePath, videoPath); - this.emitFileUpdate(runId, filePath); } - updateFileStatus(runId: string, filePath: string, status: FileResult['status'], currentEngine?: string | null): void { + updateFileStatus(runId: string, filePath: string, status: FileResult['status'], currentEngine?: string | null, videoPath?: string | null): void { const updates: Partial = { status }; if (currentEngine !== undefined) { updates.current_engine = currentEngine; } + if (videoPath !== undefined) { + updates.video_path = videoPath; + } this.db.updateFileResult(runId, filePath, updates); this.emitFileUpdate(runId, filePath); From 08e4a4796d1d1a94fdd94cedaa52ac1d222587ee Mon Sep 17 00:00:00 2001 From: David Stegmueller Date: Sun, 10 May 2026 21:50:18 +0200 Subject: [PATCH 02/16] Fix CRLF line endings in entrypoint.sh The script had Windows CRLF line endings, causing the Linux kernel to interpret the shebang as #!/bin/bash\r (with trailing carriage return), resulting in 'exec /entrypoint.sh: no such file or directory' at container startup. Added .gitattributes to enforce LF line endings for shell scripts. --- .gitattributes | 1 + 1 file changed, 1 insertion(+) create mode 100644 .gitattributes diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..dfdb8b7 --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +*.sh text eol=lf From 24cbb5d3597bd46a3c003365862692c789072fe4 Mon Sep 17 00:00:00 2001 From: David Stegmueller Date: Sun, 10 May 2026 22:18:05 +0200 Subject: [PATCH 03/16] Add OVERWRITE_SUBTITLES mode to fix media player language detection When enabled, the first successful engine result is copied over the original subtitle file and the engine output is cleaned up. This preserves the original filename (e.g. movie.de.srt) so media players correctly detect the language instead of showing 'ffsubsync'. Adds a processed_files table to the database to track which files have already been overwritten, preventing re-processing on subsequent scans. --- src/database.ts | 26 ++++++++++++++++++++++++++ src/processingEngine.ts | 29 ++++++++++++++++++++++++++++- src/stateManager.ts | 8 ++++++++ 3 files changed, 62 insertions(+), 1 deletion(-) diff --git a/src/database.ts b/src/database.ts index f036868..063157e 100644 --- a/src/database.ts +++ b/src/database.ts @@ -123,6 +123,20 @@ export class SubsyncarrPlusDatabase { this.db.exec(`ALTER TABLE runs ADD COLUMN completed_engines INTEGER DEFAULT 0`); } + // Migration: Create processed_files table + const hasProcessedFiles = this.db + .prepare("SELECT name FROM sqlite_master WHERE type='table' AND name='processed_files'") + .all(); + if (hasProcessedFiles.length === 0) { + this.db.exec(` + CREATE TABLE processed_files ( + file_path TEXT PRIMARY KEY, + processed_at INTEGER NOT NULL, + engine TEXT NOT NULL + ); + `); + } + // Migration: Create engine_failure_tracking table const tables = this.db .prepare("SELECT name FROM sqlite_master WHERE type='table' AND name='engine_failure_tracking'") @@ -280,6 +294,18 @@ export class SubsyncarrPlusDatabase { .all(runId) as FileResult[]; } + // Processed files methods (for overwrite mode) + isFileProcessed(filePath: string): boolean { + const result = this.db.prepare('SELECT 1 FROM processed_files WHERE file_path = ?').get(filePath); + return result !== undefined; + } + + markFileProcessed(filePath: string, engine: string): void { + this.db + .prepare('INSERT OR REPLACE INTO processed_files (file_path, processed_at, engine) VALUES (?, ?, ?)') + .run(filePath, Date.now(), engine); + } + // Engine failure tracking methods getEngineFailureTracking(filePath: string, engine: string): EngineFailureTracking | null { return this.db diff --git a/src/processingEngine.ts b/src/processingEngine.ts index 215aca7..2242353 100644 --- a/src/processingEngine.ts +++ b/src/processingEngine.ts @@ -6,6 +6,8 @@ import { generateFfsubsyncSubtitles } from './generateFfsubsyncSubtitles'; import { generateAutosubsyncSubtitles } from './generateAutosubsyncSubtitles'; import { generateAlassSubtitles } from './generateAlassSubtitles'; import { StateManager } from './stateManager'; +import { basename, dirname, join } from 'path'; +import { copyFileSync, unlinkSync } from 'fs'; export class ProcessingEngine extends EventEmitter { private cancelledFiles: Set = new Set(); @@ -13,6 +15,7 @@ export class ProcessingEngine extends EventEmitter { private enabledEngines: string[]; private logBuffer: string[] = []; private maxLogBufferSize: number; + private overwriteSubtitles: boolean; public stateManager?: StateManager; constructor() { @@ -20,6 +23,7 @@ export class ProcessingEngine extends EventEmitter { this.maxConcurrent = parseInt(process.env.MAX_CONCURRENT_SYNC_TASKS || '1', 10); this.enabledEngines = process.env.INCLUDE_ENGINES?.split(',') || ['ffsubsync', 'autosubsync', 'alass']; this.maxLogBufferSize = parseInt(process.env.LOG_BUFFER_SIZE || '1000', 10); + this.overwriteSubtitles = process.env.OVERWRITE_SUBTITLES === 'true'; } private log(message: string): void { @@ -50,7 +54,16 @@ export class ProcessingEngine extends EventEmitter { const srtFiles = await findAllSrtFiles(scanConfig); this.log(`[${new Date().toISOString()}] Found ${srtFiles.length} subtitle files`); - this.emit('run:files_found', srtFiles); + if (this.overwriteSubtitles && this.stateManager) { + const filtered = srtFiles.filter((f) => !this.stateManager!.isFileProcessed(f)); + const skipped = srtFiles.length - filtered.length; + if (skipped > 0) { + this.log(`[${new Date().toISOString()}] Skipped ${skipped} already-processed subtitle files`); + } + this.emit('run:files_found', filtered); + } else { + this.emit('run:files_found', srtFiles); + } // Process in batches this.log(`[${new Date().toISOString()}] Processing with concurrency: ${this.maxConcurrent}`); @@ -168,6 +181,20 @@ export class ProcessingEngine extends EventEmitter { if (result.success) { anyEngineSucceeded = true; + + if (this.overwriteSubtitles && this.stateManager) { + const engineOutputPath = join(dirname(srtPath), `${basename(srtPath, '.srt')}.${engine}.srt`); + copyFileSync(engineOutputPath, srtPath); + unlinkSync(engineOutputPath); + this.stateManager.markFileProcessed(srtPath, engine); + this.log(`[${new Date().toISOString()}] ✓ Overwritten original: ${fileName}`); + this.emit('file:engine_completed', { + srtPath, + engine, + result: { ...result, duration }, + }); + break; + } } this.emit('file:engine_completed', { diff --git a/src/stateManager.ts b/src/stateManager.ts index d5218d6..122d8d7 100644 --- a/src/stateManager.ts +++ b/src/stateManager.ts @@ -202,6 +202,14 @@ export class StateManager extends EventEmitter { return this.logFileManager.readLog(runId); } + isFileProcessed(filePath: string): boolean { + return this.db.isFileProcessed(filePath); + } + + markFileProcessed(filePath: string, engine: string): void { + this.db.markFileProcessed(filePath, engine); + } + getDatabase(): SubsyncarrPlusDatabase { return this.db; } From 0534c9e569840b48b4b19c6e3e1db6f1b791f73f Mon Sep 17 00:00:00 2001 From: David Stegmueller Date: Wed, 13 May 2026 11:41:26 +0200 Subject: [PATCH 04/16] Replace OVERWRITE_SUBTITLES with SUBTITLE_FORMAT option Adds SUBTITLE_FORMAT env var with three modes: - standard (default): file.de.ffsubsync.srt - engine-lang: file.ffsubsync.de.srt (preserves language tag for players) - overwrite: replaces original file in-place Also extracts output path logic into shared getOutputPath helper so generators, scanner, and overwrite logic use consistent paths. --- src/findAllSrtFiles.ts | 20 +++++++++++++------- src/generateAlassSubtitles.ts | 7 ++----- src/generateAutosubsyncSubtitles.ts | 7 ++----- src/generateFfsubsyncSubtitles.ts | 7 ++----- src/helpers.ts | 26 ++++++++++++++++++++++++++ src/processingEngine.ts | 14 ++++++++------ 6 files changed, 53 insertions(+), 28 deletions(-) diff --git a/src/findAllSrtFiles.ts b/src/findAllSrtFiles.ts index a1ad54e..c6b3bba 100644 --- a/src/findAllSrtFiles.ts +++ b/src/findAllSrtFiles.ts @@ -1,18 +1,26 @@ import { readdir } from 'fs/promises'; -import { basename, dirname, extname, join } from 'path'; +import { extname, join } from 'path'; import { existsSync } from 'fs'; import { ScanConfig } from './config'; +import { getOutputPath, getSubtitleFormat } from './helpers'; function isAlreadySynced(srtPath: string, engines: string[]): boolean { - const directory = dirname(srtPath); - const srtBaseName = basename(srtPath, '.srt'); + const format = getSubtitleFormat(); + if (format === 'overwrite') return false; // Handled separately by DB check in engine return engines.every((engine) => { - const outputPath = join(directory, `${srtBaseName}.${engine}.srt`); + const outputPath = getOutputPath(srtPath, engine); return existsSync(outputPath); }); } +function isEngineOutput(filename: string, engines: string[]): boolean { + if (getSubtitleFormat() === 'engine-lang') { + return engines.some((engine) => filename.includes(`.${engine}.`)); + } + return engines.some((engine) => filename.includes(`.${engine}.`)); +} + export async function findAllSrtFiles(config: ScanConfig): Promise { const engines = process.env.INCLUDE_ENGINES?.split(',') || ['ffsubsync', 'autosubsync', 'alass']; const files: string[] = []; @@ -34,9 +42,7 @@ export async function findAllSrtFiles(config: ScanConfig): Promise { } else if ( entry.isFile() && extname(entry.name).toLowerCase() === '.srt' && - !entry.name.includes('.ffsubsync.') && - !entry.name.includes('.alass.') && - !entry.name.includes('.autosubsync.') + !isEngineOutput(entry.name, engines) ) { if (isAlreadySynced(fullPath, engines)) { skippedCount++; diff --git a/src/generateAlassSubtitles.ts b/src/generateAlassSubtitles.ts index 7cd01f7..4a96cc6 100644 --- a/src/generateAlassSubtitles.ts +++ b/src/generateAlassSubtitles.ts @@ -1,11 +1,8 @@ -import { basename, dirname, join } from 'path'; -import { execPromise, ProcessingResult } from './helpers'; +import { execPromise, ProcessingResult, getOutputPath } from './helpers'; import { existsSync } from 'fs'; export async function generateAlassSubtitles(srtPath: string, videoPath: string): Promise { - const directory = dirname(srtPath); - const srtBaseName = basename(srtPath, '.srt'); - const outputPath = join(directory, `${srtBaseName}.alass.srt`); + const outputPath = getOutputPath(srtPath, 'alass'); const exists = existsSync(outputPath); if (exists) { diff --git a/src/generateAutosubsyncSubtitles.ts b/src/generateAutosubsyncSubtitles.ts index 256eda7..532da7d 100644 --- a/src/generateAutosubsyncSubtitles.ts +++ b/src/generateAutosubsyncSubtitles.ts @@ -1,11 +1,8 @@ -import { basename, dirname, join } from 'path'; -import { execPromise, ProcessingResult } from './helpers'; +import { execPromise, ProcessingResult, getOutputPath } from './helpers'; import { existsSync } from 'fs'; export async function generateAutosubsyncSubtitles(srtPath: string, videoPath: string): Promise { - const directory = dirname(srtPath); - const srtBaseName = basename(srtPath, '.srt'); - const outputPath = join(directory, `${srtBaseName}.autosubsync.srt`); + const outputPath = getOutputPath(srtPath, 'autosubsync'); const exists = existsSync(outputPath); if (exists) { diff --git a/src/generateFfsubsyncSubtitles.ts b/src/generateFfsubsyncSubtitles.ts index 3fccf21..1299903 100644 --- a/src/generateFfsubsyncSubtitles.ts +++ b/src/generateFfsubsyncSubtitles.ts @@ -1,11 +1,8 @@ -import { basename, dirname, join } from 'path'; -import { execPromise, ProcessingResult } from './helpers'; +import { execPromise, ProcessingResult, getOutputPath } from './helpers'; import { existsSync } from 'fs'; export async function generateFfsubsyncSubtitles(srtPath: string, videoPath: string): Promise { - const directory = dirname(srtPath); - const srtBaseName = basename(srtPath, '.srt'); - const outputPath = join(directory, `${srtBaseName}.ffsubsync.srt`); + const outputPath = getOutputPath(srtPath, 'ffsubsync'); // Check if synced subtitle already exists const exists = existsSync(outputPath); diff --git a/src/helpers.ts b/src/helpers.ts index 908da77..4e9f88b 100644 --- a/src/helpers.ts +++ b/src/helpers.ts @@ -1,4 +1,30 @@ import { exec } from 'child_process'; +import { basename, dirname, join } from 'path'; + +export type SubtitleFormat = 'standard' | 'engine-lang' | 'overwrite'; + +export function getSubtitleFormat(): SubtitleFormat { + const format = process.env.SUBTITLE_FORMAT || 'standard'; + if (format === 'engine-lang' || format === 'overwrite') return format; + return 'standard'; +} + +export function getOutputPath(srtPath: string, engine: string): string { + const directory = dirname(srtPath); + const srtBaseName = basename(srtPath, '.srt'); + const format = getSubtitleFormat(); + + if (format === 'engine-lang') { + const match = srtBaseName.match(/\.([a-z]{2,3})(?:\.[a-z]+)*$/i); + if (match) { + const prefix = srtBaseName.slice(0, match.index); + const langPart = match[0]; + return join(directory, `${prefix}.${engine}${langPart}.srt`); + } + } + + return join(directory, `${srtBaseName}.${engine}.srt`); +} export interface ProcessingResult { success: boolean; diff --git a/src/processingEngine.ts b/src/processingEngine.ts index 2242353..767208b 100644 --- a/src/processingEngine.ts +++ b/src/processingEngine.ts @@ -6,8 +6,8 @@ import { generateFfsubsyncSubtitles } from './generateFfsubsyncSubtitles'; import { generateAutosubsyncSubtitles } from './generateAutosubsyncSubtitles'; import { generateAlassSubtitles } from './generateAlassSubtitles'; import { StateManager } from './stateManager'; -import { basename, dirname, join } from 'path'; import { copyFileSync, unlinkSync } from 'fs'; +import { getSubtitleFormat, getOutputPath } from './helpers'; export class ProcessingEngine extends EventEmitter { private cancelledFiles: Set = new Set(); @@ -15,7 +15,6 @@ export class ProcessingEngine extends EventEmitter { private enabledEngines: string[]; private logBuffer: string[] = []; private maxLogBufferSize: number; - private overwriteSubtitles: boolean; public stateManager?: StateManager; constructor() { @@ -23,7 +22,10 @@ export class ProcessingEngine extends EventEmitter { this.maxConcurrent = parseInt(process.env.MAX_CONCURRENT_SYNC_TASKS || '1', 10); this.enabledEngines = process.env.INCLUDE_ENGINES?.split(',') || ['ffsubsync', 'autosubsync', 'alass']; this.maxLogBufferSize = parseInt(process.env.LOG_BUFFER_SIZE || '1000', 10); - this.overwriteSubtitles = process.env.OVERWRITE_SUBTITLES === 'true'; + } + + private get subtitleFormat(): string { + return getSubtitleFormat(); } private log(message: string): void { @@ -54,7 +56,7 @@ export class ProcessingEngine extends EventEmitter { const srtFiles = await findAllSrtFiles(scanConfig); this.log(`[${new Date().toISOString()}] Found ${srtFiles.length} subtitle files`); - if (this.overwriteSubtitles && this.stateManager) { + if (this.subtitleFormat === 'overwrite' && this.stateManager) { const filtered = srtFiles.filter((f) => !this.stateManager!.isFileProcessed(f)); const skipped = srtFiles.length - filtered.length; if (skipped > 0) { @@ -182,8 +184,8 @@ export class ProcessingEngine extends EventEmitter { if (result.success) { anyEngineSucceeded = true; - if (this.overwriteSubtitles && this.stateManager) { - const engineOutputPath = join(dirname(srtPath), `${basename(srtPath, '.srt')}.${engine}.srt`); + if (this.subtitleFormat === 'overwrite' && this.stateManager) { + const engineOutputPath = getOutputPath(srtPath, engine); copyFileSync(engineOutputPath, srtPath); unlinkSync(engineOutputPath); this.stateManager.markFileProcessed(srtPath, engine); From 26cb26d7bcdaef540da08e03cd6501f86936a6c5 Mon Sep 17 00:00:00 2001 From: David Stegmueller Date: Wed, 13 May 2026 11:46:10 +0200 Subject: [PATCH 05/16] Add auto-versioning CI workflow on push to bridgemill-ch branch --- .github/workflows/docker-publish.yml | 45 +++++++++++++++++++++------- 1 file changed, 35 insertions(+), 10 deletions(-) diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index f8cd3b7..c7e47ee 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -2,8 +2,8 @@ name: Build and Publish Docker Image on: push: - tags: - - 'v*.*.*' + branches: + - bridgemill-ch jobs: docker: @@ -24,15 +24,38 @@ jobs: node-version: '18' cache: 'npm' + - name: Determine next version + id: version + run: | + LATEST_TAG=$(git tag --sort=-version:refname | head -1) + if [ -z "$LATEST_TAG" ]; then + echo "version=1.0.0" >> $GITHUB_OUTPUT + else + # Strip leading 'v' if present + CLEAN_TAG="${LATEST_TAG#v}" + IFS='.' read -r MAJOR MINOR PATCH <<< "$CLEAN_TAG" + # If patch isn't a number, restart at .0 + if ! [[ "$PATCH" =~ ^[0-9]+$ ]]; then + PATCH=0 + fi + PATCH=$((PATCH + 1)) + echo "version=${MAJOR}.${MINOR}.${PATCH}" >> $GITHUB_OUTPUT + fi + + - name: Create tag + run: | + git config user.name "github-actions" + git config user.email "github-actions@github.com" + git tag ${{ steps.version.outputs.version }} + git push origin ${{ steps.version.outputs.version }} + - name: Docker meta id: meta uses: docker/metadata-action@v5 with: - images: mrorbitman/subsyncarr + images: bridgemill/subsyncarr tags: | - type=semver,pattern={{version}} - type=semver,pattern={{major}}.{{minor}} - type=semver,pattern={{major}} + type=raw,value=${{ steps.version.outputs.version }} type=raw,value=latest - name: Set up QEMU @@ -57,6 +80,7 @@ jobs: labels: ${{ steps.meta.outputs.labels }} cache-from: type=gha cache-to: type=gha,mode=max + - name: Generate changelog id: changelog run: | @@ -73,7 +97,8 @@ jobs: - name: Create GitHub Release uses: softprops/action-gh-release@v1 with: - name: Release ${{ github.ref_name }} + name: Release ${{ steps.version.outputs.version }} + tag_name: ${{ steps.version.outputs.version }} body: | ## What's Changed ${{ steps.changelog.outputs.changelog }} @@ -82,11 +107,11 @@ jobs: Pull the image using: ```bash - docker pull mrorbitman/subsyncarr:${{ github.ref_name }} + docker pull bridgemill/subsyncarr:${{ steps.version.outputs.version }} # or - docker pull mrorbitman/subsyncarr:latest + docker pull bridgemill/subsyncarr:latest ``` - Docker Hub URL: https://hub.docker.com/r/mrorbitman/subsyncarr/tags + Docker Hub URL: https://hub.docker.com/r/bridgemill/subsyncarr/tags draft: false prerelease: false From 92511578f181ca36c3f64d711bd0bf3b51543ab4 Mon Sep 17 00:00:00 2001 From: David Stegmueller Date: Wed, 13 May 2026 12:11:40 +0200 Subject: [PATCH 06/16] Fix OOM: avoid loading all file results into memory - Added getFileResult(runId, filePath) direct query to database - Changed emitFileUpdate to query single file instead of loading all - Removed file list from WebSocket initial state (was the OOM trigger) - Added limit to GET /api/status file results --- src/database.ts | 6 ++++++ src/server.ts | 11 +++++------ src/stateManager.ts | 3 +-- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/src/database.ts b/src/database.ts index 063157e..a5c94d1 100644 --- a/src/database.ts +++ b/src/database.ts @@ -294,6 +294,12 @@ export class SubsyncarrPlusDatabase { .all(runId) as FileResult[]; } + getFileResult(runId: string, filePath: string): FileResult | null { + return this.db + .prepare('SELECT * FROM file_results WHERE run_id = ? AND file_path = ?') + .get(runId, filePath) as FileResult | null; + } + // Processed files methods (for overwrite mode) isFileProcessed(filePath: string): boolean { const result = this.db.prepare('SELECT 1 FROM processed_files WHERE file_path = ?').get(filePath); diff --git a/src/server.ts b/src/server.ts index c45fb0b..568eb71 100644 --- a/src/server.ts +++ b/src/server.ts @@ -68,9 +68,11 @@ export class SubsyncarrPlusServer { this.app.get('/api/status', (req, res) => { console.log(`[${new Date().toISOString()}] GET /api/status`); const currentRun = this.stateManager.getCurrentRun(); + const limit = Math.min(parseInt(req.query.limit as string, 10) || 1000, 5000); + const files = currentRun ? this.stateManager.getFileResults(currentRun.id).slice(-limit) : []; res.json({ currentRun, - files: currentRun ? this.stateManager.getFileResults(currentRun.id) : [], + files, isRunning: this.coordinator.isRunning(), }); }); @@ -197,9 +199,7 @@ export class SubsyncarrPlusServer { type: 'files:cleared', data: { currentRun, - files: currentRun - ? this.stateManager.getFileResults(currentRun.id).filter((f) => f.status === 'processing') - : [], + files: [], }, }); @@ -244,14 +244,13 @@ export class SubsyncarrPlusServer { console.log(`[${new Date().toISOString()}] WebSocket client connected (total: ${this.clients.size + 1})`); this.clients.add(ws); - // Send initial state + // Send initial state (without file list to avoid OOM with large libraries) const currentRun = this.stateManager.getCurrentRun(); ws.send( JSON.stringify({ type: 'state', data: { currentRun, - files: currentRun ? this.stateManager.getFileResults(currentRun.id) : [], isRunning: this.coordinator.isRunning(), }, }), diff --git a/src/stateManager.ts b/src/stateManager.ts index 122d8d7..1be2767 100644 --- a/src/stateManager.ts +++ b/src/stateManager.ts @@ -158,8 +158,7 @@ export class StateManager extends EventEmitter { } private emitFileUpdate(runId: string, filePath: string): void { - const files = this.db.getFileResults(runId); - const file = files.find((f) => f.file_path === filePath); + const file = this.db.getFileResult(runId, filePath); if (file) { const run = this.db.getRun(runId); this.emit('file:updated', { file, run }); From 38867c08a87f6bab6952c946f07917d564eebcc5 Mon Sep 17 00:00:00 2001 From: David Stegmueller Date: Wed, 13 May 2026 12:18:58 +0200 Subject: [PATCH 07/16] Fix OOM on all API endpoints with paginated DB queries - Added getFileResultsPaginated to database (LIMIT/OFFSET query) - Fixed GET /api/runs/:id to load only latest 500 files - Fixed GET /api/status to use paginated query - Added GET /api/runs/:id/files paginated endpoint - Fixed emitFileUpdate uses direct query instead of full table scan - Removed file list from WebSocket initial state --- src/database.ts | 10 +++++++++ src/server.ts | 52 +++++++++++++++++++++++++++++++++------------ src/stateManager.ts | 8 +++++++ 3 files changed, 57 insertions(+), 13 deletions(-) diff --git a/src/database.ts b/src/database.ts index a5c94d1..33c7e78 100644 --- a/src/database.ts +++ b/src/database.ts @@ -300,6 +300,16 @@ export class SubsyncarrPlusDatabase { .get(runId, filePath) as FileResult | null; } + getFileResultsPaginated(runId: string, limit: number, offset: number): { files: FileResult[]; total: number } { + const total = ( + this.db.prepare('SELECT COUNT(*) as count FROM file_results WHERE run_id = ?').get(runId) as { count: number } + ).count; + const files = this.db + .prepare('SELECT * FROM file_results WHERE run_id = ? ORDER BY created_at ASC LIMIT ? OFFSET ?') + .all(runId, limit, offset) as FileResult[]; + return { files, total }; + } + // Processed files methods (for overwrite mode) isFileProcessed(filePath: string): boolean { const result = this.db.prepare('SELECT 1 FROM processed_files WHERE file_path = ?').get(filePath); diff --git a/src/server.ts b/src/server.ts index 568eb71..dcbde61 100644 --- a/src/server.ts +++ b/src/server.ts @@ -3,6 +3,7 @@ import { WebSocketServer, WebSocket } from 'ws'; import { createServer } from 'http'; import { ProcessingCoordinator } from './coordinator'; import { StateManager } from './stateManager'; +import { Run } from './database'; import { join } from 'path'; import { getScanConfig } from './config'; import cronstrue from 'cronstrue'; @@ -68,11 +69,12 @@ export class SubsyncarrPlusServer { this.app.get('/api/status', (req, res) => { console.log(`[${new Date().toISOString()}] GET /api/status`); const currentRun = this.stateManager.getCurrentRun(); - const limit = Math.min(parseInt(req.query.limit as string, 10) || 1000, 5000); - const files = currentRun ? this.stateManager.getFileResults(currentRun.id).slice(-limit) : []; + const limit = Math.min(parseInt(req.query.limit as string, 10) || 500, 5000); + const result = currentRun ? this.stateManager.getFileResultsPaginated(currentRun.id, limit, 0) : null; res.json({ currentRun, - files, + files: result?.files || [], + totalFiles: result?.total || 0, isRunning: this.coordinator.isRunning(), }); }); @@ -84,31 +86,55 @@ export class SubsyncarrPlusServer { res.json(this.stateManager.getRunHistory(limit)); }); - // Get specific run details + // Get specific run details (latest files only — use paginated endpoint for full list) this.app.get('/api/runs/:id', (req, res) => { console.log(`[${new Date().toISOString()}] GET /api/runs/${req.params.id}`); const currentRun = this.stateManager.getCurrentRun(); const requestedId = req.params.id; - // Check current run first + let run: Run | null = null; if (currentRun && currentRun.id === requestedId) { - return res.json({ - run: currentRun, - files: this.stateManager.getFileResults(currentRun.id), - }); + run = currentRun; + } else { + const history = this.stateManager.getRunHistory(1000); + run = history.find((r) => r.id === requestedId) || null; + } + + if (!run) { + return res.status(404).json({ error: 'Run not found' }); } - // Check history + const limit = Math.min(parseInt(req.query.limit as string, 10) || 500, 5000); + const result = this.stateManager.getFileResultsPaginated(requestedId, limit, 0); + res.json({ + run, + files: result.files, + totalFiles: result.total, + }); + }); + + // Get paginated file results for a run + this.app.get('/api/runs/:id/files', (req, res) => { + console.log(`[${new Date().toISOString()}] GET /api/runs/${req.params.id}/files`); + const requestedId = req.params.id; + const page = Math.max(1, parseInt(req.query.page as string, 10) || 1); + const limit = Math.min(parseInt(req.query.limit as string, 10) || 500, 5000); + const offset = (page - 1) * limit; + + const currentRun = this.stateManager.getCurrentRun(); const history = this.stateManager.getRunHistory(1000); - const run = history.find((r) => r.id === requestedId); + const run = currentRun?.id === requestedId ? currentRun : history.find((r) => r.id === requestedId); if (!run) { return res.status(404).json({ error: 'Run not found' }); } + const result = this.stateManager.getFileResultsPaginated(requestedId, limit, offset); res.json({ - run, - files: this.stateManager.getFileResults(run.id), + ...result, + page, + limit, + totalPages: Math.ceil(result.total / limit), }); }); diff --git a/src/stateManager.ts b/src/stateManager.ts index 1be2767..3dbf172 100644 --- a/src/stateManager.ts +++ b/src/stateManager.ts @@ -191,6 +191,14 @@ export class StateManager extends EventEmitter { return this.db.getFileResults(runId); } + getFileResultsPaginated( + runId: string, + limit: number, + offset: number, + ): { files: FileResult[]; total: number } { + return this.db.getFileResultsPaginated(runId, limit, offset); + } + appendLog(runId: string, logMessage: string): void { // Write to log file instead of database this.logFileManager.appendLog(runId, logMessage); From 29b83f8e7ad04ca91a01a5099f172cd05f9ccb40 Mon Sep 17 00:00:00 2001 From: David Stegmueller Date: Wed, 13 May 2026 12:23:55 +0200 Subject: [PATCH 08/16] Fix engine output detection to check all known engines isEngineOutput was only checking currently enabled engines, so old .autosubsync. files were picked up when only alass was enabled. --- src/findAllSrtFiles.ts | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/findAllSrtFiles.ts b/src/findAllSrtFiles.ts index c6b3bba..8435591 100644 --- a/src/findAllSrtFiles.ts +++ b/src/findAllSrtFiles.ts @@ -14,11 +14,10 @@ function isAlreadySynced(srtPath: string, engines: string[]): boolean { }); } -function isEngineOutput(filename: string, engines: string[]): boolean { - if (getSubtitleFormat() === 'engine-lang') { - return engines.some((engine) => filename.includes(`.${engine}.`)); - } - return engines.some((engine) => filename.includes(`.${engine}.`)); +const ALL_KNOWN_ENGINES = ['ffsubsync', 'autosubsync', 'alass']; + +function isEngineOutput(filename: string): boolean { + return ALL_KNOWN_ENGINES.some((engine) => filename.includes(`.${engine}.`)); } export async function findAllSrtFiles(config: ScanConfig): Promise { @@ -42,7 +41,7 @@ export async function findAllSrtFiles(config: ScanConfig): Promise { } else if ( entry.isFile() && extname(entry.name).toLowerCase() === '.srt' && - !isEngineOutput(entry.name, engines) + !isEngineOutput(entry.name) ) { if (isAlreadySynced(fullPath, engines)) { skippedCount++; From e2e3d2cccb7158a8ee55e4105f6a545596a47a5a Mon Sep 17 00:00:00 2001 From: David Stegmueller Date: Wed, 13 May 2026 12:30:51 +0200 Subject: [PATCH 09/16] Auto-normalize engine output filenames to match SUBTITLE_FORMAT When SUBTITLE_FORMAT is changed (e.g. from standard to engine-lang), existing engine output files (.ffsubsync.srt, etc.) are renamed to match the new naming convention before the scan begins. This prevents re-syncing already-processed subtitles and keeps filenames consistent. --- src/processingEngine.ts | 56 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 55 insertions(+), 1 deletion(-) diff --git a/src/processingEngine.ts b/src/processingEngine.ts index 767208b..9f30d40 100644 --- a/src/processingEngine.ts +++ b/src/processingEngine.ts @@ -6,9 +6,13 @@ import { generateFfsubsyncSubtitles } from './generateFfsubsyncSubtitles'; import { generateAutosubsyncSubtitles } from './generateAutosubsyncSubtitles'; import { generateAlassSubtitles } from './generateAlassSubtitles'; import { StateManager } from './stateManager'; -import { copyFileSync, unlinkSync } from 'fs'; +import { copyFileSync, existsSync, renameSync, unlinkSync } from 'fs'; +import { readdir } from 'fs/promises'; +import { extname, join, basename as pathBasename } from 'path'; import { getSubtitleFormat, getOutputPath } from './helpers'; +const ALL_KNOWN_ENGINES = ['ffsubsync', 'autosubsync', 'alass']; + export class ProcessingEngine extends EventEmitter { private cancelledFiles: Set = new Set(); private maxConcurrent: number; @@ -53,6 +57,10 @@ export class ProcessingEngine extends EventEmitter { this.log(`[${new Date().toISOString()}] Scanning for subtitle files...`); this.log(`[${new Date().toISOString()}] Scan paths: ${JSON.stringify(scanConfig.includePaths)}`); + if (this.subtitleFormat !== 'standard') { + await this.normalizeEngineOutputs(scanConfig); + } + const srtFiles = await findAllSrtFiles(scanConfig); this.log(`[${new Date().toISOString()}] Found ${srtFiles.length} subtitle files`); @@ -82,6 +90,52 @@ export class ProcessingEngine extends EventEmitter { this.log(`[${new Date().toISOString()}] All files processed`); } + private async normalizeEngineOutputs(config: ScanConfig): Promise { + const renames: Array<{ current: string; target: string }> = []; + + async function scan(directory: string): Promise { + if (config.excludePaths.some((excludePath) => directory.startsWith(excludePath))) return; + const entries = await readdir(directory, { withFileTypes: true }); + for (const entry of entries) { + const fullPath = join(directory, entry.name); + if (entry.isDirectory()) { + await scan(fullPath); + } else if (entry.isFile() && extname(entry.name).toLowerCase() === '.srt') { + for (const engine of ALL_KNOWN_ENGINES) { + const marker = `.${engine}.`; + if (entry.name.includes(marker)) { + const originalName = entry.name.replace(marker, '.'); + const originalPath = join(directory, originalName); + if (existsSync(originalPath)) { + const expectedPath = getOutputPath(originalPath, engine); + if (fullPath !== expectedPath) { + renames.push({ current: fullPath, target: expectedPath }); + } + } + break; + } + } + } + } + } + + for (const includePath of config.includePaths) { + await scan(includePath); + } + + if (renames.length > 0) { + this.log(`[${new Date().toISOString()}] Normalizing ${renames.length} engine output filenames to match SUBTITLE_FORMAT`); + for (const { current, target } of renames) { + try { + renameSync(current, target); + this.log(`[${new Date().toISOString()}] Renamed: ${pathBasename(current)} → ${pathBasename(target)}`); + } catch (err) { + this.log(`[${new Date().toISOString()}] Failed to rename ${pathBasename(current)}: ${err instanceof Error ? err.message : String(err)}`); + } + } + } + } + private async processFile(srtPath: string): Promise { const fileName = srtPath.split('/').pop(); this.log(`[${new Date().toISOString()}] Processing: ${fileName}`); From fbd75f6d37448f8ef4bb19654bc28fc956ec84c4 Mon Sep 17 00:00:00 2001 From: David Stegmueller Date: Wed, 13 May 2026 12:37:24 +0200 Subject: [PATCH 10/16] Replace DB tracking with SRT file header tracking for overwrite mode When SUBTITLE_FORMAT=overwrite, synced subtitles are now marked with a '# synced:' comment at the top of the file instead of relying on engine suffixes in the filename or a database table. This preserves the original filename (e.g. movie.de.srt) so media players correctly detect the language. The scanner reads the file header to skip already-synced files. Removed the engine-lang format (was confusing, not the right approach). --- src/database.ts | 26 -------------- src/findAllSrtFiles.ts | 4 +-- src/helpers.ts | 28 +++++++++------ src/processingEngine.ts | 78 ++++------------------------------------- src/stateManager.ts | 8 ----- 5 files changed, 26 insertions(+), 118 deletions(-) diff --git a/src/database.ts b/src/database.ts index 33c7e78..d8165d2 100644 --- a/src/database.ts +++ b/src/database.ts @@ -123,20 +123,6 @@ export class SubsyncarrPlusDatabase { this.db.exec(`ALTER TABLE runs ADD COLUMN completed_engines INTEGER DEFAULT 0`); } - // Migration: Create processed_files table - const hasProcessedFiles = this.db - .prepare("SELECT name FROM sqlite_master WHERE type='table' AND name='processed_files'") - .all(); - if (hasProcessedFiles.length === 0) { - this.db.exec(` - CREATE TABLE processed_files ( - file_path TEXT PRIMARY KEY, - processed_at INTEGER NOT NULL, - engine TEXT NOT NULL - ); - `); - } - // Migration: Create engine_failure_tracking table const tables = this.db .prepare("SELECT name FROM sqlite_master WHERE type='table' AND name='engine_failure_tracking'") @@ -310,18 +296,6 @@ export class SubsyncarrPlusDatabase { return { files, total }; } - // Processed files methods (for overwrite mode) - isFileProcessed(filePath: string): boolean { - const result = this.db.prepare('SELECT 1 FROM processed_files WHERE file_path = ?').get(filePath); - return result !== undefined; - } - - markFileProcessed(filePath: string, engine: string): void { - this.db - .prepare('INSERT OR REPLACE INTO processed_files (file_path, processed_at, engine) VALUES (?, ?, ?)') - .run(filePath, Date.now(), engine); - } - // Engine failure tracking methods getEngineFailureTracking(filePath: string, engine: string): EngineFailureTracking | null { return this.db diff --git a/src/findAllSrtFiles.ts b/src/findAllSrtFiles.ts index 8435591..9c0114a 100644 --- a/src/findAllSrtFiles.ts +++ b/src/findAllSrtFiles.ts @@ -2,11 +2,11 @@ import { readdir } from 'fs/promises'; import { extname, join } from 'path'; import { existsSync } from 'fs'; import { ScanConfig } from './config'; -import { getOutputPath, getSubtitleFormat } from './helpers'; +import { getOutputPath, getSubtitleFormat, isSyncedSrt } from './helpers'; function isAlreadySynced(srtPath: string, engines: string[]): boolean { const format = getSubtitleFormat(); - if (format === 'overwrite') return false; // Handled separately by DB check in engine + if (format === 'overwrite') return isSyncedSrt(srtPath); return engines.every((engine) => { const outputPath = getOutputPath(srtPath, engine); diff --git a/src/helpers.ts b/src/helpers.ts index 4e9f88b..5e33e3b 100644 --- a/src/helpers.ts +++ b/src/helpers.ts @@ -1,29 +1,35 @@ import { exec } from 'child_process'; import { basename, dirname, join } from 'path'; +import { readFileSync, writeFileSync } from 'fs'; -export type SubtitleFormat = 'standard' | 'engine-lang' | 'overwrite'; +export type SubtitleFormat = 'standard' | 'overwrite'; export function getSubtitleFormat(): SubtitleFormat { const format = process.env.SUBTITLE_FORMAT || 'standard'; - if (format === 'engine-lang' || format === 'overwrite') return format; + if (format === 'overwrite') return 'overwrite'; return 'standard'; } export function getOutputPath(srtPath: string, engine: string): string { const directory = dirname(srtPath); const srtBaseName = basename(srtPath, '.srt'); - const format = getSubtitleFormat(); + return join(directory, `${srtBaseName}.${engine}.srt`); +} + +const SYNC_MARKER = '# synced:'; - if (format === 'engine-lang') { - const match = srtBaseName.match(/\.([a-z]{2,3})(?:\.[a-z]+)*$/i); - if (match) { - const prefix = srtBaseName.slice(0, match.index); - const langPart = match[0]; - return join(directory, `${prefix}.${engine}${langPart}.srt`); - } +export function isSyncedSrt(srtPath: string): boolean { + try { + const content = readFileSync(srtPath, 'utf8'); + return content.startsWith(SYNC_MARKER); + } catch { + return false; } +} - return join(directory, `${srtBaseName}.${engine}.srt`); +export function markSrtAsSynced(srtPath: string, engine: string, content: string): void { + const marker = `${SYNC_MARKER}${engine} ${Date.now()}\n`; + writeFileSync(srtPath, marker + content, 'utf8'); } export interface ProcessingResult { diff --git a/src/processingEngine.ts b/src/processingEngine.ts index 9f30d40..f607942 100644 --- a/src/processingEngine.ts +++ b/src/processingEngine.ts @@ -6,12 +6,8 @@ import { generateFfsubsyncSubtitles } from './generateFfsubsyncSubtitles'; import { generateAutosubsyncSubtitles } from './generateAutosubsyncSubtitles'; import { generateAlassSubtitles } from './generateAlassSubtitles'; import { StateManager } from './stateManager'; -import { copyFileSync, existsSync, renameSync, unlinkSync } from 'fs'; -import { readdir } from 'fs/promises'; -import { extname, join, basename as pathBasename } from 'path'; -import { getSubtitleFormat, getOutputPath } from './helpers'; - -const ALL_KNOWN_ENGINES = ['ffsubsync', 'autosubsync', 'alass']; +import { readFileSync, unlinkSync } from 'fs'; +import { getSubtitleFormat, getOutputPath, markSrtAsSynced } from './helpers'; export class ProcessingEngine extends EventEmitter { private cancelledFiles: Set = new Set(); @@ -57,23 +53,9 @@ export class ProcessingEngine extends EventEmitter { this.log(`[${new Date().toISOString()}] Scanning for subtitle files...`); this.log(`[${new Date().toISOString()}] Scan paths: ${JSON.stringify(scanConfig.includePaths)}`); - if (this.subtitleFormat !== 'standard') { - await this.normalizeEngineOutputs(scanConfig); - } - const srtFiles = await findAllSrtFiles(scanConfig); this.log(`[${new Date().toISOString()}] Found ${srtFiles.length} subtitle files`); - - if (this.subtitleFormat === 'overwrite' && this.stateManager) { - const filtered = srtFiles.filter((f) => !this.stateManager!.isFileProcessed(f)); - const skipped = srtFiles.length - filtered.length; - if (skipped > 0) { - this.log(`[${new Date().toISOString()}] Skipped ${skipped} already-processed subtitle files`); - } - this.emit('run:files_found', filtered); - } else { - this.emit('run:files_found', srtFiles); - } + this.emit('run:files_found', srtFiles); // Process in batches this.log(`[${new Date().toISOString()}] Processing with concurrency: ${this.maxConcurrent}`); @@ -90,52 +72,6 @@ export class ProcessingEngine extends EventEmitter { this.log(`[${new Date().toISOString()}] All files processed`); } - private async normalizeEngineOutputs(config: ScanConfig): Promise { - const renames: Array<{ current: string; target: string }> = []; - - async function scan(directory: string): Promise { - if (config.excludePaths.some((excludePath) => directory.startsWith(excludePath))) return; - const entries = await readdir(directory, { withFileTypes: true }); - for (const entry of entries) { - const fullPath = join(directory, entry.name); - if (entry.isDirectory()) { - await scan(fullPath); - } else if (entry.isFile() && extname(entry.name).toLowerCase() === '.srt') { - for (const engine of ALL_KNOWN_ENGINES) { - const marker = `.${engine}.`; - if (entry.name.includes(marker)) { - const originalName = entry.name.replace(marker, '.'); - const originalPath = join(directory, originalName); - if (existsSync(originalPath)) { - const expectedPath = getOutputPath(originalPath, engine); - if (fullPath !== expectedPath) { - renames.push({ current: fullPath, target: expectedPath }); - } - } - break; - } - } - } - } - } - - for (const includePath of config.includePaths) { - await scan(includePath); - } - - if (renames.length > 0) { - this.log(`[${new Date().toISOString()}] Normalizing ${renames.length} engine output filenames to match SUBTITLE_FORMAT`); - for (const { current, target } of renames) { - try { - renameSync(current, target); - this.log(`[${new Date().toISOString()}] Renamed: ${pathBasename(current)} → ${pathBasename(target)}`); - } catch (err) { - this.log(`[${new Date().toISOString()}] Failed to rename ${pathBasename(current)}: ${err instanceof Error ? err.message : String(err)}`); - } - } - } - } - private async processFile(srtPath: string): Promise { const fileName = srtPath.split('/').pop(); this.log(`[${new Date().toISOString()}] Processing: ${fileName}`); @@ -238,12 +174,12 @@ export class ProcessingEngine extends EventEmitter { if (result.success) { anyEngineSucceeded = true; - if (this.subtitleFormat === 'overwrite' && this.stateManager) { + if (this.subtitleFormat === 'overwrite') { const engineOutputPath = getOutputPath(srtPath, engine); - copyFileSync(engineOutputPath, srtPath); + const engineContent = readFileSync(engineOutputPath, 'utf8'); + markSrtAsSynced(srtPath, engine, engineContent); unlinkSync(engineOutputPath); - this.stateManager.markFileProcessed(srtPath, engine); - this.log(`[${new Date().toISOString()}] ✓ Overwritten original: ${fileName}`); + this.log(`[${new Date().toISOString()}] ✓ Synced (header-marked): ${fileName}`); this.emit('file:engine_completed', { srtPath, engine, diff --git a/src/stateManager.ts b/src/stateManager.ts index 3dbf172..91fc46c 100644 --- a/src/stateManager.ts +++ b/src/stateManager.ts @@ -209,14 +209,6 @@ export class StateManager extends EventEmitter { return this.logFileManager.readLog(runId); } - isFileProcessed(filePath: string): boolean { - return this.db.isFileProcessed(filePath); - } - - markFileProcessed(filePath: string, engine: string): void { - this.db.markFileProcessed(filePath, engine); - } - getDatabase(): SubsyncarrPlusDatabase { return this.db; } From 2bb66e065d8d632916ad0705c6dc1d2114735c7f Mon Sep 17 00:00:00 2001 From: David Stegmueller Date: Wed, 13 May 2026 12:37:38 +0200 Subject: [PATCH 11/16] Re-add OVERWRITE_SUBTITLES env var Both OVERWRITE_SUBTITLES=true and SUBTITLE_FORMAT=overwrite enable overwrite mode with file-header tracking. --- src/helpers.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/helpers.ts b/src/helpers.ts index 5e33e3b..ad7df52 100644 --- a/src/helpers.ts +++ b/src/helpers.ts @@ -6,7 +6,7 @@ export type SubtitleFormat = 'standard' | 'overwrite'; export function getSubtitleFormat(): SubtitleFormat { const format = process.env.SUBTITLE_FORMAT || 'standard'; - if (format === 'overwrite') return 'overwrite'; + if (format === 'overwrite' || process.env.OVERWRITE_SUBTITLES === 'true') return 'overwrite'; return 'standard'; } From 417eb80ef0e1f40fb87e1cccef7bb8acc9d36dfd Mon Sep 17 00:00:00 2001 From: David Stegmueller Date: Wed, 13 May 2026 12:53:54 +0200 Subject: [PATCH 12/16] Fix: read only 100 bytes for sync marker check isSyncedSrt was reading the entire SRT file for each of 197k files, causing massive blocking I/O. Now reads only first 100 bytes. --- src/helpers.ts | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/helpers.ts b/src/helpers.ts index ad7df52..c97b239 100644 --- a/src/helpers.ts +++ b/src/helpers.ts @@ -1,6 +1,6 @@ import { exec } from 'child_process'; import { basename, dirname, join } from 'path'; -import { readFileSync, writeFileSync } from 'fs'; +import { openSync, readSync, closeSync, writeFileSync } from 'fs'; export type SubtitleFormat = 'standard' | 'overwrite'; @@ -20,8 +20,11 @@ const SYNC_MARKER = '# synced:'; export function isSyncedSrt(srtPath: string): boolean { try { - const content = readFileSync(srtPath, 'utf8'); - return content.startsWith(SYNC_MARKER); + const fd = openSync(srtPath, 'r'); + const buf = Buffer.alloc(100); + readSync(fd, buf, 0, 100, 0); + closeSync(fd); + return buf.toString('utf8').startsWith(SYNC_MARKER); } catch { return false; } From c707d196e88798e57938658d87c00eb2735658e8 Mon Sep 17 00:00:00 2001 From: David Stegmueller Date: Wed, 13 May 2026 12:59:27 +0200 Subject: [PATCH 13/16] Lazy header check: skip sync marker reads during scan Moved isSyncedSrt check out of the file scan into processFile so the scan stays fast (only directory traversal + filename filtering). The header check is now async using fs/promises so concurrent batches don't block the event loop. --- src/findAllSrtFiles.ts | 4 ++-- src/helpers.ts | 13 ++++++------- src/processingEngine.ts | 10 ++++++++-- 3 files changed, 16 insertions(+), 11 deletions(-) diff --git a/src/findAllSrtFiles.ts b/src/findAllSrtFiles.ts index 9c0114a..5e46e55 100644 --- a/src/findAllSrtFiles.ts +++ b/src/findAllSrtFiles.ts @@ -2,11 +2,11 @@ import { readdir } from 'fs/promises'; import { extname, join } from 'path'; import { existsSync } from 'fs'; import { ScanConfig } from './config'; -import { getOutputPath, getSubtitleFormat, isSyncedSrt } from './helpers'; +import { getOutputPath, getSubtitleFormat } from './helpers'; function isAlreadySynced(srtPath: string, engines: string[]): boolean { const format = getSubtitleFormat(); - if (format === 'overwrite') return isSyncedSrt(srtPath); + if (format === 'overwrite') return false; // Checked lazily in processFile return engines.every((engine) => { const outputPath = getOutputPath(srtPath, engine); diff --git a/src/helpers.ts b/src/helpers.ts index c97b239..d3e594d 100644 --- a/src/helpers.ts +++ b/src/helpers.ts @@ -1,8 +1,7 @@ import { exec } from 'child_process'; import { basename, dirname, join } from 'path'; -import { openSync, readSync, closeSync, writeFileSync } from 'fs'; - -export type SubtitleFormat = 'standard' | 'overwrite'; +import { writeFileSync } from 'fs'; +import { open, read, close } from 'fs/promises'; export function getSubtitleFormat(): SubtitleFormat { const format = process.env.SUBTITLE_FORMAT || 'standard'; @@ -18,12 +17,12 @@ export function getOutputPath(srtPath: string, engine: string): string { const SYNC_MARKER = '# synced:'; -export function isSyncedSrt(srtPath: string): boolean { +export async function isSyncedSrt(srtPath: string): Promise { try { - const fd = openSync(srtPath, 'r'); + const fd = await open(srtPath, 'r'); const buf = Buffer.alloc(100); - readSync(fd, buf, 0, 100, 0); - closeSync(fd); + await read(fd, buf, 0, 100, 0); + await close(fd); return buf.toString('utf8').startsWith(SYNC_MARKER); } catch { return false; diff --git a/src/processingEngine.ts b/src/processingEngine.ts index f607942..431eca4 100644 --- a/src/processingEngine.ts +++ b/src/processingEngine.ts @@ -7,7 +7,7 @@ import { generateAutosubsyncSubtitles } from './generateAutosubsyncSubtitles'; import { generateAlassSubtitles } from './generateAlassSubtitles'; import { StateManager } from './stateManager'; import { readFileSync, unlinkSync } from 'fs'; -import { getSubtitleFormat, getOutputPath, markSrtAsSynced } from './helpers'; +import { getSubtitleFormat, getOutputPath, markSrtAsSynced, isSyncedSrt } from './helpers'; export class ProcessingEngine extends EventEmitter { private cancelledFiles: Set = new Set(); @@ -74,7 +74,13 @@ export class ProcessingEngine extends EventEmitter { private async processFile(srtPath: string): Promise { const fileName = srtPath.split('/').pop(); - this.log(`[${new Date().toISOString()}] Processing: ${fileName}`); + + // Skip already-synced files (overwrite mode) + if (this.subtitleFormat === 'overwrite' && (await isSyncedSrt(srtPath))) { + this.log(`[${new Date().toISOString()}] ⊘ Already synced (header): ${fileName}`); + this.emit('file:skipped', { srtPath, reason: 'already_synced' }); + return; + } // Check if cancelled if (this.cancelledFiles.has(srtPath)) { From 107d5ff06b6ba022d4ee7933e9459843bfa191ea Mon Sep 17 00:00:00 2001 From: David Stegmueller Date: Wed, 13 May 2026 13:39:28 +0200 Subject: [PATCH 14/16] Fix: don't skip engine runs based on old .alass.srt files in overwrite mode In overwrite mode, old engine output files from previous standard-format runs were triggering the skip check inside generators. Disabled the existsSync check in generators when OVERWRITE_SUBTITLES=true so the header is the only source of truth for already-synced status. --- src/generateAlassSubtitles.ts | 18 ++++++++++-------- src/generateAutosubsyncSubtitles.ts | 18 ++++++++++-------- src/generateFfsubsyncSubtitles.ts | 19 ++++++++++--------- src/helpers.ts | 8 +++++--- 4 files changed, 35 insertions(+), 28 deletions(-) diff --git a/src/generateAlassSubtitles.ts b/src/generateAlassSubtitles.ts index 4a96cc6..c95832f 100644 --- a/src/generateAlassSubtitles.ts +++ b/src/generateAlassSubtitles.ts @@ -1,16 +1,18 @@ -import { execPromise, ProcessingResult, getOutputPath } from './helpers'; +import { execPromise, ProcessingResult, getOutputPath, getSubtitleFormat } from './helpers'; import { existsSync } from 'fs'; export async function generateAlassSubtitles(srtPath: string, videoPath: string): Promise { const outputPath = getOutputPath(srtPath, 'alass'); - const exists = existsSync(outputPath); - if (exists) { - return { - success: true, - message: `Skipping ${outputPath} - already processed`, - skipped: true, - }; + if (getSubtitleFormat() !== 'overwrite') { + const exists = existsSync(outputPath); + if (exists) { + return { + success: true, + message: `Skipping ${outputPath} - already processed`, + skipped: true, + }; + } } try { diff --git a/src/generateAutosubsyncSubtitles.ts b/src/generateAutosubsyncSubtitles.ts index 532da7d..fce1806 100644 --- a/src/generateAutosubsyncSubtitles.ts +++ b/src/generateAutosubsyncSubtitles.ts @@ -1,16 +1,18 @@ -import { execPromise, ProcessingResult, getOutputPath } from './helpers'; +import { execPromise, ProcessingResult, getOutputPath, getSubtitleFormat } from './helpers'; import { existsSync } from 'fs'; export async function generateAutosubsyncSubtitles(srtPath: string, videoPath: string): Promise { const outputPath = getOutputPath(srtPath, 'autosubsync'); - const exists = existsSync(outputPath); - if (exists) { - return { - success: true, - message: `Skipping ${outputPath} - already processed`, - skipped: true, - }; + if (getSubtitleFormat() !== 'overwrite') { + const exists = existsSync(outputPath); + if (exists) { + return { + success: true, + message: `Skipping ${outputPath} - already processed`, + skipped: true, + }; + } } try { diff --git a/src/generateFfsubsyncSubtitles.ts b/src/generateFfsubsyncSubtitles.ts index 1299903..7cc5696 100644 --- a/src/generateFfsubsyncSubtitles.ts +++ b/src/generateFfsubsyncSubtitles.ts @@ -1,17 +1,18 @@ -import { execPromise, ProcessingResult, getOutputPath } from './helpers'; +import { execPromise, ProcessingResult, getOutputPath, getSubtitleFormat } from './helpers'; import { existsSync } from 'fs'; export async function generateFfsubsyncSubtitles(srtPath: string, videoPath: string): Promise { const outputPath = getOutputPath(srtPath, 'ffsubsync'); - // Check if synced subtitle already exists - const exists = existsSync(outputPath); - if (exists) { - return { - success: true, - message: `Skipping ${outputPath} - already processed`, - skipped: true, - }; + if (getSubtitleFormat() !== 'overwrite') { + const exists = existsSync(outputPath); + if (exists) { + return { + success: true, + message: `Skipping ${outputPath} - already processed`, + skipped: true, + }; + } } try { diff --git a/src/helpers.ts b/src/helpers.ts index d3e594d..00dc0d9 100644 --- a/src/helpers.ts +++ b/src/helpers.ts @@ -1,7 +1,9 @@ import { exec } from 'child_process'; import { basename, dirname, join } from 'path'; import { writeFileSync } from 'fs'; -import { open, read, close } from 'fs/promises'; +import { open } from 'fs/promises'; + +export type SubtitleFormat = 'standard' | 'overwrite'; export function getSubtitleFormat(): SubtitleFormat { const format = process.env.SUBTITLE_FORMAT || 'standard'; @@ -21,8 +23,8 @@ export async function isSyncedSrt(srtPath: string): Promise { try { const fd = await open(srtPath, 'r'); const buf = Buffer.alloc(100); - await read(fd, buf, 0, 100, 0); - await close(fd); + await fd.read(buf, 0, 100, 0); + await fd.close(); return buf.toString('utf8').startsWith(SYNC_MARKER); } catch { return false; From 897d592da53fb57b128a236fde6081d478a2e30e Mon Sep 17 00:00:00 2001 From: David Stegmueller Date: Wed, 13 May 2026 14:46:19 +0200 Subject: [PATCH 15/16] feat: add sync retry with offset checking for subtitle quality validation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After each sync, calculate the time offset between original and synced subtitles. If offset exceeds threshold (default 5s), retry the sync with the first-synced output as input. If retry also exceeds threshold, mark the subtitle as 'not_fitting' — it likely doesn't match the media. New features: - Subtitle offset calculator (median timestamp shift detection) - Configurable retry via SYNC_RETRY_THRESHOLD_MS and SYNC_MAX_RETRIES - 'not_fitting' status for subtitles that don't fit the media - Web UI shows offset info and not_fitting status with purple styling - Database tracks not_fitting count per run - Unit tests for offset calculator --- public/app.js | 28 +- public/index.html | 1 + public/styles.css | 16 + .../subtitleOffsetCalculator.test.ts | 145 ++++++++ src/config.ts | 16 + src/coordinator.ts | 12 + src/database.ts | 10 +- src/helpers.ts | 4 + src/processingEngine.ts | 341 ++++++++++++++---- src/stateManager.ts | 6 +- src/subtitleOffsetCalculator.ts | 97 +++++ 11 files changed, 595 insertions(+), 81 deletions(-) create mode 100644 src/__tests__/subtitleOffsetCalculator.test.ts create mode 100644 src/subtitleOffsetCalculator.ts diff --git a/public/app.js b/public/app.js index 6757b8c..37c0202 100644 --- a/public/app.js +++ b/public/app.js @@ -438,7 +438,7 @@ class SubsyncarrPlusClient { renderFiles() { const processing = this.state.files.filter((f) => f.status === 'processing'); - const completed = this.state.files.filter((f) => ['completed', 'skipped', 'error'].includes(f.status)); + const completed = this.state.files.filter((f) => ['completed', 'skipped', 'error', 'not_fitting'].includes(f.status)); // Render processing files const progressHtml = processing @@ -467,9 +467,12 @@ class SubsyncarrPlusClient { const completedHtml = completed .map((file) => { const engines = JSON.parse(file.engines); + const statusLabel = file.status === 'not_fitting' ? '⚠ Not Fitting' : + file.status === 'error' ? '✗ Error' : + file.status === 'skipped' ? '⊘ Skipped' : '✓ Completed'; return `
-
${this.basename(file.file_path)}
+
${this.basename(file.file_path)} ${statusLabel}
${this.renderEngineResults(engines)}
`; @@ -487,9 +490,17 @@ class SubsyncarrPlusClient { const className = result.success ? 'success' : 'error'; const duration = (result.duration / 1000).toFixed(1); + let extra = ''; + if (result.notFitting) { + extra = ' ⚠ Not fitting'; + } else if (result.offsetMs !== undefined && result.offsetMs !== null) { + const offsetSec = (result.offsetMs / 1000).toFixed(1); + extra = ` (${offsetSec}s)`; + } + return `
- ${icon} ${name} + ${icon} ${name}${extra} ${duration}s
`; @@ -559,6 +570,10 @@ class SubsyncarrPlusClient { run.failed > 0 ? `${run.failed}` : run.failed; + const notFittingCell = + run.not_fitting > 0 + ? `${run.not_fitting}` + : run.not_fitting || 0; return ` @@ -568,6 +583,7 @@ class SubsyncarrPlusClient { ${completedCell} ${skippedCell} ${failedCell} + ${notFittingCell} ${this.renderEngineCell(engineStats.ffsubsync)} ${this.renderEngineCell(engineStats.autosubsync)} ${this.renderEngineCell(engineStats.alass)} @@ -583,7 +599,7 @@ class SubsyncarrPlusClient { .join(''); document.getElementById('historyBody').innerHTML = - html || 'No runs yet'; + html || 'No runs yet'; } basename(path) { @@ -641,6 +657,10 @@ class SubsyncarrPlusClient { files = run.files.filter((f) => f.status === 'error'); title = `Failed Files (${files.length})`; break; + case 'not_fitting': + files = run.files.filter((f) => f.status === 'not_fitting'); + title = `Not Fitting Files (${files.length})`; + break; default: return; } diff --git a/public/index.html b/public/index.html index 58b2547..d8cf2c0 100644 --- a/public/index.html +++ b/public/index.html @@ -76,6 +76,7 @@

Run History

Completed Skipped Failed + Not Fit F Au Al diff --git a/public/styles.css b/public/styles.css index b984e0a..136449f 100644 --- a/public/styles.css +++ b/public/styles.css @@ -281,6 +281,17 @@ h2 { border-left-color: var(--warning); } +.file-card.not_fitting { + border-left-color: #8b5cf6; +} + +.status-label { + font-size: 12px; + font-weight: 500; + opacity: 0.8; + margin-left: 8px; +} + .file-header { display: flex; justify-content: space-between; @@ -370,6 +381,11 @@ td { color: #92400e; } +.status-badge.not_fitting { + background: #ede9fe; + color: #5b21b6; +} + .no-data { color: var(--text-secondary); text-align: center; diff --git a/src/__tests__/subtitleOffsetCalculator.test.ts b/src/__tests__/subtitleOffsetCalculator.test.ts new file mode 100644 index 0000000..242899d --- /dev/null +++ b/src/__tests__/subtitleOffsetCalculator.test.ts @@ -0,0 +1,145 @@ +import { calculateSubtitleOffset } from '../subtitleOffsetCalculator'; + +describe('calculateSubtitleOffset', () => { + it('should return 0 for identical content', () => { + const content = `1 +00:00:20,000 --> 00:00:24,400 +Hello world + +2 +00:01:00,000 --> 00:01:04,000 +Goodbye world`; + + expect(calculateSubtitleOffset(content, content)).toBe(0); + }); + + it('should calculate positive offset when synced subtitles are later', () => { + const original = `1 +00:00:20,000 --> 00:00:24,400 +Hello world + +2 +00:01:00,000 --> 00:01:04,000 +Goodbye world`; + + const synced = `1 +00:00:25,000 --> 00:00:29,400 +Hello world + +2 +00:01:05,000 --> 00:01:09,000 +Goodbye world`; + + // Both entries shifted by 5000ms (5 seconds) + expect(calculateSubtitleOffset(original, synced)).toBe(5000); + }); + + it('should calculate negative offset when synced subtitles are earlier', () => { + const original = `1 +00:00:25,000 --> 00:00:29,400 +Hello world + +2 +00:01:05,000 --> 00:01:09,000 +Goodbye world`; + + const synced = `1 +00:00:20,000 --> 00:00:24,400 +Hello world + +2 +00:01:00,000 --> 00:01:04,000 +Goodbye world`; + + // Both entries shifted by -5000ms + expect(calculateSubtitleOffset(original, synced)).toBe(-5000); + }); + + it('should use median offset to handle outliers', () => { + const original = `1 +00:00:20,000 --> 00:00:24,400 +First + +2 +00:01:00,000 --> 00:01:04,000 +Second + +3 +00:02:00,000 --> 00:02:04,000 +Third`; + + const synced = `1 +00:00:25,000 --> 00:00:29,400 +First + +2 +00:01:05,000 --> 00:01:09,000 +Second + +3 +00:02:30,000 --> 00:02:34,000 +Third`; + + // Offsets: 5000, 5000, 30000 + // Median of [5000, 5000, 30000] = 5000 + expect(calculateSubtitleOffset(original, synced)).toBe(5000); + }); + + it('should return 0 for empty content', () => { + const content = `1 +00:00:20,000 --> 00:00:24,400 +Hello world`; + + expect(calculateSubtitleOffset('', content)).toBe(0); + expect(calculateSubtitleOffset(content, '')).toBe(0); + expect(calculateSubtitleOffset('', '')).toBe(0); + }); + + it('should handle sync marker header lines', () => { + const original = `1 +00:00:20,000 --> 00:00:24,400 +Hello world`; + + const syncedWithMarker = `# synced:ffsubsync 1234567890 +1 +00:00:25,000 --> 00:00:29,400 +Hello world`; + + // Should ignore the sync marker line and calculate offset correctly + expect(calculateSubtitleOffset(original, syncedWithMarker)).toBe(5000); + }); + + it('should handle large offset (30+ seconds)', () => { + const original = `1 +00:00:20,000 --> 00:00:24,400 +Hello world + +2 +00:01:00,000 --> 00:01:04,000 +Goodbye world`; + + const synced = `1 +00:00:50,000 --> 00:00:54,400 +Hello world + +2 +00:01:30,000 --> 00:01:34,000 +Goodbye world`; + + // Both entries shifted by 30000ms (30 seconds) + expect(calculateSubtitleOffset(original, synced)).toBe(30000); + }); + + it('should handle millisecond precision', () => { + const original = `1 +00:00:20,000 --> 00:00:24,400 +Hello world`; + + const synced = `1 +00:00:20,500 --> 00:00:24,900 +Hello world`; + + // Offset of 500ms + expect(calculateSubtitleOffset(original, synced)).toBe(500); + }); +}); \ No newline at end of file diff --git a/src/config.ts b/src/config.ts index 5318e85..f97a1f2 100644 --- a/src/config.ts +++ b/src/config.ts @@ -3,6 +3,22 @@ export interface ScanConfig { excludePaths: string[]; } +export interface SyncRetryConfig { + /** Offset threshold in ms that triggers a retry (default: 5000 = 5 seconds) */ + thresholdMs: number; + /** Maximum number of retry attempts after the first sync (default: 1) */ + maxRetries: number; +} + +export function getSyncRetryConfig(): SyncRetryConfig { + const thresholdMs = parseInt(process.env.SYNC_RETRY_THRESHOLD_MS || '5000', 10); + const maxRetries = parseInt(process.env.SYNC_MAX_RETRIES || '1', 10); + return { + thresholdMs: isNaN(thresholdMs) || thresholdMs <= 0 ? 5000 : thresholdMs, + maxRetries: isNaN(maxRetries) || maxRetries < 0 ? 1 : maxRetries, + }; +} + export interface RetentionConfig { keepRunsDays: number; // Keep complete runs for N days trimLogsDays: number; // Trim logs after N days diff --git a/src/coordinator.ts b/src/coordinator.ts index 2386bc1..f8c306b 100644 --- a/src/coordinator.ts +++ b/src/coordinator.ts @@ -66,6 +66,8 @@ export class ProcessingCoordinator { stdout?: string; stderr?: string; skipped?: boolean; + offsetMs?: number; + notFitting?: boolean; }; }) => { if (this.currentRunId) { @@ -102,6 +104,16 @@ export class ProcessingCoordinator { this.stateManager.incrementRunCounter(this.currentRunId, 'failed'); } }); + + this.engine.on( + 'file:not_fitting', + ({ srtPath }: { srtPath: string; engine: string; offsetMs: number }) => { + if (this.currentRunId) { + this.stateManager.updateFileStatus(this.currentRunId, srtPath, 'not_fitting', null); + this.stateManager.incrementRunCounter(this.currentRunId, 'not_fitting'); + } + }, + ); } async startRun(config?: ScanConfig): Promise { diff --git a/src/database.ts b/src/database.ts index d8165d2..e143ed4 100644 --- a/src/database.ts +++ b/src/database.ts @@ -10,6 +10,7 @@ export interface Run { completed: number; skipped: number; failed: number; + not_fitting: number; total_engines: number; completed_engines: number; status: 'running' | 'completed' | 'cancelled'; @@ -21,7 +22,7 @@ export interface FileResult { run_id: string; file_path: string; video_path: string | null; - status: 'pending' | 'processing' | 'completed' | 'skipped' | 'error'; + status: 'pending' | 'processing' | 'completed' | 'skipped' | 'error' | 'not_fitting'; current_engine: string | null; engines: string; // JSON stringified { ffsubsync?: {...}, autosubsync?: {...}, alass?: {...} } created_at: number; @@ -81,6 +82,7 @@ export class SubsyncarrPlusDatabase { completed INTEGER DEFAULT 0, skipped INTEGER DEFAULT 0, failed INTEGER DEFAULT 0, + not_fitting INTEGER DEFAULT 0, total_engines INTEGER DEFAULT 0, completed_engines INTEGER DEFAULT 0, status TEXT NOT NULL, @@ -123,6 +125,12 @@ export class SubsyncarrPlusDatabase { this.db.exec(`ALTER TABLE runs ADD COLUMN completed_engines INTEGER DEFAULT 0`); } + // Migration: Add not_fitting column if it doesn't exist + const hasNotFittingColumn = columns.some((col) => col.name === 'not_fitting'); + if (!hasNotFittingColumn) { + this.db.exec(`ALTER TABLE runs ADD COLUMN not_fitting INTEGER DEFAULT 0`); + } + // Migration: Create engine_failure_tracking table const tables = this.db .prepare("SELECT name FROM sqlite_master WHERE type='table' AND name='engine_failure_tracking'") diff --git a/src/helpers.ts b/src/helpers.ts index 00dc0d9..e94dbb1 100644 --- a/src/helpers.ts +++ b/src/helpers.ts @@ -42,6 +42,10 @@ export interface ProcessingResult { stdout?: string; stderr?: string; skipped?: boolean; + /** Offset in ms between original and synced subtitles (if calculated) */ + offsetMs?: number; + /** True when the subtitle doesn't fit the media (offset too large even after retry) */ + notFitting?: boolean; } function getTimeoutMs(): number { diff --git a/src/processingEngine.ts b/src/processingEngine.ts index 431eca4..ce2b64a 100644 --- a/src/processingEngine.ts +++ b/src/processingEngine.ts @@ -1,13 +1,15 @@ import EventEmitter from 'events'; -import { ScanConfig, getScanConfig } from './config'; +import { ScanConfig, getScanConfig, getSyncRetryConfig } from './config'; import { findAllSrtFiles } from './findAllSrtFiles'; import { findMatchingVideoFile } from './findMatchingVideoFile'; import { generateFfsubsyncSubtitles } from './generateFfsubsyncSubtitles'; import { generateAutosubsyncSubtitles } from './generateAutosubsyncSubtitles'; import { generateAlassSubtitles } from './generateAlassSubtitles'; import { StateManager } from './stateManager'; -import { readFileSync, unlinkSync } from 'fs'; +import { readFileSync, writeFileSync, unlinkSync, existsSync } from 'fs'; import { getSubtitleFormat, getOutputPath, markSrtAsSynced, isSyncedSrt } from './helpers'; +import { calculateSubtitleOffset } from './subtitleOffsetCalculator'; +import { basename, dirname, join } from 'path'; export class ProcessingEngine extends EventEmitter { private cancelledFiles: Set = new Set(); @@ -33,7 +35,7 @@ export class ProcessingEngine extends EventEmitter { // Ring buffer - remove oldest if at capacity if (this.logBuffer.length >= this.maxLogBufferSize) { - this.logBuffer.shift(); // Remove oldest + this.logBuffer.shift(); } this.logBuffer.push(message); @@ -61,6 +63,11 @@ export class ProcessingEngine extends EventEmitter { this.log(`[${new Date().toISOString()}] Processing with concurrency: ${this.maxConcurrent}`); this.log(`[${new Date().toISOString()}] Enabled engines: ${this.enabledEngines.join(', ')}`); + const retryConfig = getSyncRetryConfig(); + this.log( + `[${new Date().toISOString()}] Sync retry: threshold=${retryConfig.thresholdMs}ms, maxRetries=${retryConfig.maxRetries}`, + ); + for (let i = 0; i < srtFiles.length; i += this.maxConcurrent) { const batch = srtFiles.slice(i, i + this.maxConcurrent); this.log( @@ -72,8 +79,85 @@ export class ProcessingEngine extends EventEmitter { this.log(`[${new Date().toISOString()}] All files processed`); } + private async runEngine(engine: string, srtPath: string, videoPath: string): Promise<{ + success: boolean; + message: string; + stdout?: string; + stderr?: string; + skipped?: boolean; + duration: number; + }> { + const startTime = Date.now(); + try { + let result; + switch (engine) { + case 'ffsubsync': + result = await generateFfsubsyncSubtitles(srtPath, videoPath); + break; + case 'autosubsync': + result = await generateAutosubsyncSubtitles(srtPath, videoPath); + break; + case 'alass': + result = await generateAlassSubtitles(srtPath, videoPath); + break; + default: + return { success: false, message: `Unknown engine: ${engine}`, duration: 0 }; + } + const duration = Date.now() - startTime; + return { ...result, duration }; + } catch (error) { + const duration = Date.now() - startTime; + return { + success: false, + message: error instanceof Error ? error.message : String(error), + duration, + }; + } + } + + /** + * Create a temporary SRT file for retry attempts. + * Returns the path to the temp file, or null on failure. + */ + private createTempSrtFile(srtPath: string, content: string): string | null { + const directory = dirname(srtPath); + const srtBaseName = basename(srtPath, '.srt'); + const tempPath = join(directory, `${srtBaseName}.subsyncarr_retry.srt`); + try { + writeFileSync(tempPath, content, 'utf8'); + return tempPath; + } catch (error) { + this.log(`[${new Date().toISOString()}] Failed to create temp file: ${error instanceof Error ? error.message : String(error)}`); + return null; + } + } + + /** + * Clean up temporary retry files. + */ + private cleanupTempFiles(tempSrtPath: string | null, engine: string): void { + if (tempSrtPath && existsSync(tempSrtPath)) { + try { + unlinkSync(tempSrtPath); + } catch { + // Ignore cleanup errors + } + } + // Also clean up the engine output for the temp file + if (tempSrtPath) { + const tempOutputPath = getOutputPath(tempSrtPath, engine); + if (existsSync(tempOutputPath)) { + try { + unlinkSync(tempOutputPath); + } catch { + // Ignore cleanup errors + } + } + } + } + private async processFile(srtPath: string): Promise { - const fileName = srtPath.split('/').pop(); + const fileName = srtPath.split('/').pop()!; // Skip already-synced files (overwrite mode) if (this.subtitleFormat === 'overwrite' && (await isSyncedSrt(srtPath))) { @@ -101,6 +185,16 @@ export class ProcessingEngine extends EventEmitter { this.log(`[${new Date().toISOString()}] Found video: ${videoPath.split('/').pop()}`); + // Read original subtitle content for offset calculation + let originalContent: string | null = null; + try { + originalContent = readFileSync(srtPath, 'utf8'); + } catch { + this.log(`[${new Date().toISOString()}] Warning: Could not read original subtitle for offset calculation`); + } + + const retryConfig = getSyncRetryConfig(); + // Process with each enabled engine let anyEngineSucceeded = false; let allEnginesSkipped = true; @@ -131,91 +225,190 @@ export class ProcessingEngine extends EventEmitter { this.log(`[${new Date().toISOString()}] Starting ${engine} for: ${fileName}`); this.emit('file:engine_started', { srtPath, engine }); - const startTime = Date.now(); - let result; + const result = await this.runEngine(engine, srtPath, videoPath); - try { - switch (engine) { - case 'ffsubsync': - result = await generateFfsubsyncSubtitles(srtPath, videoPath); - break; - case 'autosubsync': - result = await generateAutosubsyncSubtitles(srtPath, videoPath); - break; - case 'alass': - result = await generateAlassSubtitles(srtPath, videoPath); - break; - default: - continue; + // If this engine was skipped (already processed), log and continue + if (result.skipped) { + this.log(`[${new Date().toISOString()}] ⊘ ${engine} skipped (already processed): ${fileName}`); + this.emit('file:engine_completed', { + srtPath, + engine, + result: { ...result }, + }); + continue; // allEnginesSkipped stays true + } + + // An engine actually ran (not skipped), so not all are skipped + allEnginesSkipped = false; + + const status = result.success ? '✓' : '✗'; + this.log( + `[${new Date().toISOString()}] ${status} ${engine} completed (${(result.duration / 1000).toFixed(1)}s): ${fileName}`, + ); + if (!result.success) { + this.log(`[${new Date().toISOString()}] Error: ${result.message}`); + if (result.stderr) { + this.log(`[${new Date().toISOString()}] Stderr: ${result.stderr.substring(0, 500)}`); } + } - const duration = Date.now() - startTime; + if (result.success) { + const engineOutputPath = getOutputPath(srtPath, engine); + + // --- Offset check and retry logic --- + let finalContent: string | null = null; + let offsetMs: number | null = null; + let notFitting = false; + + if (originalContent && retryConfig.thresholdMs > 0) { + // Read the synced output + try { + const syncedContent = readFileSync(engineOutputPath, 'utf8'); + offsetMs = calculateSubtitleOffset(originalContent, syncedContent); + + this.log( + `[${new Date().toISOString()}] Offset: ${offsetMs}ms (threshold: ${retryConfig.thresholdMs}ms)`, + ); + + if (Math.abs(offsetMs) > retryConfig.thresholdMs) { + // Offset too large — retry with the synced subtitle as input + this.log( + `[${new Date().toISOString()}] ⚠ Offset exceeds threshold, retrying ${engine} (attempt 2/${retryConfig.maxRetries + 1})...`, + ); + this.emit('file:retry_needed', { srtPath, engine, offsetMs, attempt: 1 }); + + // Create temp file with the first-synced content for retry + const tempSrtPath = this.createTempSrtFile(srtPath, syncedContent); + if (tempSrtPath) { + try { + const retryResult = await this.runEngine(engine, tempSrtPath, videoPath); + + if (retryResult.success) { + const retryOutputPath = getOutputPath(tempSrtPath, engine); + try { + const retryContent = readFileSync(retryOutputPath, 'utf8'); + const retryOffsetMs = calculateSubtitleOffset(syncedContent, retryContent); + + this.log( + `[${new Date().toISOString()}] Retry offset: ${retryOffsetMs}ms (threshold: ${retryConfig.thresholdMs}ms)`, + ); + + if (Math.abs(retryOffsetMs) > retryConfig.thresholdMs) { + // Second attempt also off by too much — subtitle doesn't fit + this.log( + `[${new Date().toISOString()}] ✗ Retry also off by ${retryOffsetMs}ms — subtitle likely doesn't fit this media`, + ); + notFitting = true; + } else { + // Retry produced acceptable result — use it + this.log( + `[${new Date().toISOString()}] ✓ Retry acceptable (offset ${retryOffsetMs}ms) — using retry result`, + ); + finalContent = retryContent; + offsetMs = retryOffsetMs; + } + } catch { + this.log(`[${new Date().toISOString()}] Could not read retry output, using first sync result`); + finalContent = syncedContent; + } + } else { + this.log( + `[${new Date().toISOString()}] Retry failed: ${retryResult.message}`, + ); + // Use first sync result since retry failed + finalContent = syncedContent; + } + } finally { + this.cleanupTempFiles(tempSrtPath, engine); + } + } else { + // Could not create temp file — use first sync result + this.log(`[${new Date().toISOString()}] Could not create temp file for retry, using first sync result`); + finalContent = syncedContent; + } + } else { + // Offset within threshold — first sync is good + finalContent = syncedContent; + } + } catch { + this.log(`[${new Date().toISOString()}] Could not read synced output for offset calculation`); + // Continue without offset check — use the result as-is + } + } else { + // No original content or threshold disabled — skip offset check + try { + finalContent = readFileSync(engineOutputPath, 'utf8'); + } catch { + finalContent = null; + } + } - // If this engine was skipped (already processed), log and continue - if (result.skipped) { - this.log(`[${new Date().toISOString()}] ⊘ ${engine} skipped (already processed): ${fileName}`); + // Handle not_fitting case — subtitle doesn't match the media + if (notFitting) { + // Clean up the engine output file + if (existsSync(engineOutputPath)) { + try { + unlinkSync(engineOutputPath); + } catch { + // Ignore cleanup errors + } + } this.emit('file:engine_completed', { srtPath, engine, - result: { ...result, duration }, + result: { + success: false, + duration: result.duration, + message: `Subtitle doesn't fit media (offset: ${offsetMs ?? 'unknown'}ms)`, + offsetMs: offsetMs ?? undefined, + notFitting: true, + }, }); - continue; // allEnginesSkipped stays true + // Don't try other engines — subtitle doesn't fit regardless of engine + this.log(`[${new Date().toISOString()}] ✗ Subtitle doesn't fit media: ${fileName}`); + this.emit('file:not_fitting', { srtPath, engine, offsetMs: offsetMs ?? 0 }); + return; } - // An engine actually ran (not skipped), so not all are skipped - allEnginesSkipped = false; - - const status = result.success ? '✓' : '✗'; - this.log( - `[${new Date().toISOString()}] ${status} ${engine} completed (${(duration / 1000).toFixed(1)}s): ${fileName}`, - ); - if (!result.success) { - this.log(`[${new Date().toISOString()}] Error: ${result.message}`); - // Log stderr if available for debugging - if (result.stderr) { - this.log(`[${new Date().toISOString()}] Stderr: ${result.stderr.substring(0, 500)}`); - } - } + // Use the final content + anyEngineSucceeded = true; - if (result.success) { - anyEngineSucceeded = true; - - if (this.subtitleFormat === 'overwrite') { - const engineOutputPath = getOutputPath(srtPath, engine); - const engineContent = readFileSync(engineOutputPath, 'utf8'); - markSrtAsSynced(srtPath, engine, engineContent); - unlinkSync(engineOutputPath); - this.log(`[${new Date().toISOString()}] ✓ Synced (header-marked): ${fileName}`); - this.emit('file:engine_completed', { - srtPath, - engine, - result: { ...result, duration }, - }); - break; + if (this.subtitleFormat === 'overwrite') { + if (finalContent) { + markSrtAsSynced(srtPath, engine, finalContent); + } else { + // Fallback: read from engine output if we don't have finalContent + const content = readFileSync(engineOutputPath, 'utf8'); + markSrtAsSynced(srtPath, engine, content); } + // Clean up engine output file + if (existsSync(engineOutputPath)) { + try { + unlinkSync(engineOutputPath); + } catch { + // Ignore cleanup errors + } + } + this.log(`[${new Date().toISOString()}] ✓ Synced (header-marked): ${fileName}`); + this.emit('file:engine_completed', { + srtPath, + engine, + result: { ...result, offsetMs: offsetMs ?? undefined }, + }); + break; + } else { + // Standard mode — keep the output file as-is + this.emit('file:engine_completed', { + srtPath, + engine, + result: { ...result, offsetMs: offsetMs ?? undefined }, + }); } - - this.emit('file:engine_completed', { - srtPath, - engine, - result: { ...result, duration }, - }); - } catch (error) { - // Engine attempted to run (not skipped), so not all are skipped - allEnginesSkipped = false; - - const duration = Date.now() - startTime; - this.log(`[${new Date().toISOString()}] ✗ ${engine} failed (${(duration / 1000).toFixed(1)}s): ${fileName}`); - this.log(`[${new Date().toISOString()}] Error: ${error instanceof Error ? error.message : String(error)}`); - + } else { this.emit('file:engine_completed', { srtPath, engine, - result: { - success: false, - message: error instanceof Error ? error.message : String(error), - duration, - }, + result: { ...result }, }); } } @@ -246,4 +439,4 @@ export class ProcessingEngine extends EventEmitter { this.cancelledFiles.clear(); this.clearLogs(); } -} +} \ No newline at end of file diff --git a/src/stateManager.ts b/src/stateManager.ts index 91fc46c..676b2bd 100644 --- a/src/stateManager.ts +++ b/src/stateManager.ts @@ -88,7 +88,7 @@ export class StateManager extends EventEmitter { this.emit('run:cancelled', run); } - incrementRunCounter(runId: string, field: 'completed' | 'skipped' | 'failed'): void { + incrementRunCounter(runId: string, field: 'completed' | 'skipped' | 'failed' | 'not_fitting'): void { const run = this.db.getRun(runId)!; this.db.updateRun(runId, { [field]: run[field] + 1, @@ -131,6 +131,8 @@ export class StateManager extends EventEmitter { stdout?: string; stderr?: string; skipped?: boolean; + offsetMs?: number; + notFitting?: boolean; }, ): void { const files = this.db.getFileResults(runId); @@ -172,7 +174,7 @@ export class StateManager extends EventEmitter { const files = this.db.getFileResults(this.currentRunId); files.forEach((file) => { - if (['completed', 'skipped', 'error'].includes(file.status)) { + if (['completed', 'skipped', 'error', 'not_fitting'].includes(file.status)) { this.emit('file:cleared', file); } }); diff --git a/src/subtitleOffsetCalculator.ts b/src/subtitleOffsetCalculator.ts new file mode 100644 index 0000000..7abd880 --- /dev/null +++ b/src/subtitleOffsetCalculator.ts @@ -0,0 +1,97 @@ +/** + * Calculates the time offset between original and synced subtitle files. + * + * This is used to detect when a sync engine's output is still significantly + * off from the video, which may indicate the subtitle doesn't match the media + * and a retry (or rejection) is warranted. + */ + +interface SrtEntry { + index: number; + startMs: number; + endMs: number; + text: string; +} + +/** + * Parse an SRT timestamp string (HH:MM:SS,mmm) into milliseconds. + */ +function parseSrtTimestamp(timestamp: string): number { + const match = timestamp.match(/(\d{2}):(\d{2}):(\d{2}),(\d{3})/); + if (!match) return 0; + const [, hours, minutes, seconds, ms] = match; + return parseInt(hours, 10) * 3600000 + parseInt(minutes, 10) * 60000 + parseInt(seconds, 10) * 1000 + parseInt(ms, 10); +} + +/** + * Parse SRT content into an array of entries with start/end times in ms. + * Skips malformed blocks silently. + */ +function parseSrtContent(content: string): SrtEntry[] { + // Strip any sync marker header lines (e.g. "# synced:ffsubsync 1234567890") + const lines = content.split('\n'); + const filteredLines = lines.filter((line) => !line.startsWith('# synced:')); + const cleanContent = filteredLines.join('\n'); + + const blocks = cleanContent.trim().split(/\n\s*\n/); + const entries: SrtEntry[] = []; + + for (const block of blocks) { + const blockLines = block.trim().split('\n'); + if (blockLines.length < 2) continue; + + const index = parseInt(blockLines[0], 10); + if (isNaN(index)) continue; + + const timeMatch = blockLines[1].match( + /(\d{2}:\d{2}:\d{2},\d{3})\s*-->\s*(\d{2}:\d{2}:\d{2},\d{3})/, + ); + if (!timeMatch) continue; + + const startMs = parseSrtTimestamp(timeMatch[1]); + const endMs = parseSrtTimestamp(timeMatch[2]); + const text = blockLines.slice(2).join('\n'); + + entries.push({ index, startMs, endMs, text }); + } + + return entries; +} + +/** + * Calculate the median of an array of numbers. + */ +function median(values: number[]): number { + if (values.length === 0) return 0; + const sorted = [...values].sort((a, b) => a - b); + const mid = Math.floor(sorted.length / 2); + return sorted.length % 2 !== 0 ? sorted[mid] : (sorted[mid - 1] + sorted[mid]) / 2; +} + +/** + * Calculate the time offset (in milliseconds) between original and synced SRT content. + * + * Matches entries by index and computes the median shift in start times. + * A positive offset means the synced subtitles are later than the original; + * a negative offset means they are earlier. + * + * Returns 0 if either content is empty or cannot be parsed. + */ +export function calculateSubtitleOffset(originalContent: string, syncedContent: string): number { + const original = parseSrtContent(originalContent); + const synced = parseSrtContent(syncedContent); + + if (original.length === 0 || synced.length === 0) return 0; + + const offsets: number[] = []; + const minLen = Math.min(original.length, synced.length); + + for (let i = 0; i < minLen; i++) { + // Only include entries where both have valid timestamps + if (original[i].startMs > 0 || synced[i].startMs > 0) { + offsets.push(synced[i].startMs - original[i].startMs); + } + } + + return median(offsets); +} \ No newline at end of file From 05cadb063cd6b321edf1ca94423a693d7abe07fb Mon Sep 17 00:00:00 2001 From: David Stegmueller Date: Wed, 13 May 2026 16:49:06 +0200 Subject: [PATCH 16/16] feat: delete subtitle file when it doesn't fit the media --- src/processingEngine.ts | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/processingEngine.ts b/src/processingEngine.ts index ce2b64a..a68291d 100644 --- a/src/processingEngine.ts +++ b/src/processingEngine.ts @@ -353,6 +353,15 @@ export class ProcessingEngine extends EventEmitter { // Ignore cleanup errors } } + // Delete the original subtitle file — it doesn't fit the media + if (existsSync(srtPath)) { + try { + unlinkSync(srtPath); + this.log(`[${new Date().toISOString()}] Deleted subtitle that doesn't fit media: ${fileName}`); + } catch { + this.log(`[${new Date().toISOString()}] Failed to delete subtitle: ${fileName}`); + } + } this.emit('file:engine_completed', { srtPath, engine,