From 65d6cefc3809d49e0f8879aec24505a89ab86561 Mon Sep 17 00:00:00 2001 From: HONGDAE KIM Date: Sun, 15 Feb 2026 14:33:49 +0900 Subject: [PATCH] Add visualization recommendation flow with API/UI fallback --- bitnet_tools/ui/app.js | 48 +++++++++++++++- bitnet_tools/ui/index.html | 4 ++ bitnet_tools/visualize.py | 97 +++++++++++++++++++++------------ bitnet_tools/viz_recommender.py | 74 +++++++++++++++++++++++++ bitnet_tools/web.py | 89 +++++++++++++++++++++++++----- tests/test_viz_recommender.py | 30 ++++++++++ tests/test_web.py | 15 +++++ 7 files changed, 306 insertions(+), 51 deletions(-) create mode 100644 bitnet_tools/viz_recommender.py create mode 100644 tests/test_viz_recommender.py diff --git a/bitnet_tools/ui/app.js b/bitnet_tools/ui/app.js index 15c3db9..01add18 100644 --- a/bitnet_tools/ui/app.js +++ b/bitnet_tools/ui/app.js @@ -5,6 +5,8 @@ const UI = { refreshSheetsBtn: document.getElementById('refreshSheetsBtn'), csvText: document.getElementById('csvText'), question: document.getElementById('question'), + showVizOptionsBtn: document.getElementById('showVizOptionsBtn'), + vizRecommendation: document.getElementById('vizRecommendation'), intent: document.getElementById('intent'), intentActions: document.getElementById('intentActions'), model: document.getElementById('model'), @@ -85,6 +87,7 @@ const appState = { uploadedFile: null, detectedInputType: 'csv', candidateTables: [], + vizRecommendation: null, }; const CONFIDENCE_THRESHOLD_DEFAULT = 0.7; @@ -676,6 +679,32 @@ function setPreprocessStatusText(text) { if (UI.preprocessStatus) UI.preprocessStatus.textContent = text; } +function renderVizRecommendation(rec) { + appState.vizRecommendation = rec || null; + if (!UI.vizRecommendation) return; + if (!rec) { + UI.vizRecommendation.textContent = '추천 시각화 옵션이 여기에 표시됩니다.'; + return; + } + const charts = Array.isArray(rec.recommended_chart_types) ? rec.recommended_chart_types.join(', ') : '-'; + UI.vizRecommendation.textContent = `의도: ${rec.intent || '-'}\n추천 차트: ${charts}\n추천 사유: ${rec.reason || '-'}`; +} + +async function fetchVizRecommendation() { + clearError(); + const question = (UI.question?.value || '').trim(); + try { + const rec = await postJson('/api/viz/recommend', { question }, '시각화 추천'); + renderVizRecommendation(rec); + setStatus('시각화 추천을 확인했습니다.'); + return rec; + } catch (err) { + showError(err.userMessage || '시각화 추천 실패', err.detail || ''); + setStatus('시각화 추천 실패'); + return null; + } +} + function stopPreprocessPolling() { if (appState.preprocessJob.pollTimer) { clearInterval(appState.preprocessJob.pollTimer); @@ -771,7 +800,13 @@ async function pollChartJobOnce() { if (result.status === 'done') { stopChartPolling(); UI.retryChartsJobBtn.disabled = true; - setStatus('차트 작업 완료'); + if (result.fallback) { + const fallbackText = JSON.stringify(result.fallback, null, 2); + setChartsJobStatusText(`job=${result.job_id} status=done (fallback)\nreason=${result.fallback_reason || 'chart_generation_failed'}\n${fallbackText}`); + setStatus('차트 생성 실패로 표/요약 fallback을 제공합니다.'); + } else { + setStatus('차트 작업 완료'); + } } else if (result.status === 'failed') { stopChartPolling(); UI.retryChartsJobBtn.disabled = false; @@ -812,7 +847,9 @@ async function startChartsJob() { const payloadFiles = await buildMultiPayloadFiles(files); appState.chartJob.files = payloadFiles; - const queued = await postJson('/api/charts/jobs', { files: payloadFiles }, '차트 작업 생성'); + const rec = appState.vizRecommendation || await fetchVizRecommendation(); + const selectedChartTypes = Array.isArray(rec?.recommended_chart_types) ? rec.recommended_chart_types : []; + const queued = await postJson('/api/charts/jobs', { files: payloadFiles, question: UI.question?.value || '', selected_chart_types: selectedChartTypes }, '차트 작업 생성'); appState.chartJob.id = queued.job_id; appState.chartJob.status = queued.status; UI.retryChartsJobBtn.disabled = true; @@ -836,7 +873,10 @@ async function retryChartsJob() { clearError(); toggleBusy(true); try { - const queued = await postJson('/api/charts/jobs', { files: appState.chartJob.files }, '차트 작업 재시도'); + const selectedChartTypes = Array.isArray(appState.vizRecommendation?.recommended_chart_types) + ? appState.vizRecommendation.recommended_chart_types + : []; + const queued = await postJson('/api/charts/jobs', { files: appState.chartJob.files, question: UI.question?.value || '', selected_chart_types: selectedChartTypes }, '차트 작업 재시도'); appState.chartJob.id = queued.job_id; appState.chartJob.status = queued.status; UI.retryChartsJobBtn.disabled = true; @@ -1120,6 +1160,7 @@ function bindEvents() { UI.analyzeBtn?.addEventListener('click', runByIntent); UI.quickAnalyzeBtn?.addEventListener('click', runByIntent); + UI.showVizOptionsBtn?.addEventListener('click', fetchVizRecommendation); UI.runBtn?.addEventListener('click', runModel); UI.multiAnalyzeBtn?.addEventListener('click', runMultiAnalyze); UI.startChartsJobBtn?.addEventListener('click', startChartsJob); @@ -1195,6 +1236,7 @@ function init() { if (UI.retryPreprocessBtn) UI.retryPreprocessBtn.disabled = true; setChartsJobStatusText('차트 작업 대기 중'); setPreprocessStatusText('입력 전처리 대기 중'); + renderVizRecommendation(null); } init(); diff --git a/bitnet_tools/ui/index.html b/bitnet_tools/ui/index.html index 230b363..bd09666 100644 --- a/bitnet_tools/ui/index.html +++ b/bitnet_tools/ui/index.html @@ -49,6 +49,10 @@

2) 입력

+
+ +
+
추천 시각화 옵션이 여기에 표시됩니다.
diff --git a/bitnet_tools/visualize.py b/bitnet_tools/visualize.py index 9da48d7..1587076 100644 --- a/bitnet_tools/visualize.py +++ b/bitnet_tools/visualize.py @@ -7,6 +7,9 @@ from typing import Any +SUPPORTED_CHART_TYPES = {"histogram", "boxplot", "missing", "bar", "scatter", "line"} + + SAMPLE_CAP = 20000 TOP_K = 10 @@ -103,6 +106,7 @@ def create_file_charts( out_dir: Path, max_numeric: int = 3, max_categorical: int = 2, + selected_chart_types: list[str] | None = None, ) -> list[str]: plt = _ensure_matplotlib() @@ -116,6 +120,9 @@ def create_file_charts( artifacts: list[str] = [] stem = _safe_stem(csv_path) + selected = {c.strip().lower() for c in (selected_chart_types or []) if c} + if not selected: + selected = set(SUPPORTED_CHART_TYPES) for col in numeric_cols: values: list[float] = profiles[col]["values"] @@ -123,29 +130,31 @@ def create_file_charts( if not values: continue - fig = plt.figure(figsize=(7, 4)) - plt.hist(values, bins=20) - plt.title(f"{stem} - {col} histogram(sample)") - plt.xlabel(col) - plt.ylabel("count") - plt.tight_layout() - out = out_dir / f"{stem}_{col}_hist.png" - fig.savefig(out) - plt.close(fig) - artifacts.append(str(out)) - - fig = plt.figure(figsize=(5, 4)) - plt.boxplot(values, vert=True) - plt.title(f"{stem} - {col} boxplot(sample)") - plt.ylabel(col) - plt.tight_layout() - out = out_dir / f"{stem}_{col}_box.png" - fig.savefig(out) - plt.close(fig) - artifacts.append(str(out)) + if "histogram" in selected: + fig = plt.figure(figsize=(7, 4)) + plt.hist(values, bins=20) + plt.title(f"{stem} - {col} histogram(sample)") + plt.xlabel(col) + plt.ylabel("count") + plt.tight_layout() + out = out_dir / f"{stem}_{col}_hist.png" + fig.savefig(out) + plt.close(fig) + artifacts.append(str(out)) + + if "boxplot" in selected: + fig = plt.figure(figsize=(5, 4)) + plt.boxplot(values, vert=True) + plt.title(f"{stem} - {col} boxplot(sample)") + plt.ylabel(col) + plt.tight_layout() + out = out_dir / f"{stem}_{col}_box.png" + fig.savefig(out) + plt.close(fig) + artifacts.append(str(out)) total = profiles[col]["seen"] + missing - if total > 0: + if total > 0 and "missing" in selected: fig = plt.figure(figsize=(5, 3)) plt.bar(["non_missing", "missing"], [profiles[col]["seen"], missing]) plt.title(f"{stem} - {col} missing overview") @@ -162,17 +171,18 @@ def create_file_charts( labels = [x[0] for x in items] counts = [x[1] for x in items] - fig = plt.figure(figsize=(8, 4)) - plt.bar(range(len(labels)), counts) - plt.xticks(range(len(labels)), labels, rotation=30, ha="right") - plt.title(f"{stem} - {col} top values") - plt.tight_layout() - out = out_dir / f"{stem}_{col}_top.png" - fig.savefig(out) - plt.close(fig) - artifacts.append(str(out)) - - if len(numeric_cols) >= 2: + if "bar" in selected: + fig = plt.figure(figsize=(8, 4)) + plt.bar(range(len(labels)), counts) + plt.xticks(range(len(labels)), labels, rotation=30, ha="right") + plt.title(f"{stem} - {col} top values") + plt.tight_layout() + out = out_dir / f"{stem}_{col}_top.png" + fig.savefig(out) + plt.close(fig) + artifacts.append(str(out)) + + if len(numeric_cols) >= 2 and ("scatter" in selected or "line" in selected): x_col, y_col = numeric_cols[0], numeric_cols[1] xs: list[float] = [] ys: list[float] = [] @@ -192,7 +202,7 @@ def create_file_charts( seen += 1 _reservoir_pair(xs, ys, x, y, seen, SAMPLE_CAP) - if xs and ys: + if xs and ys and "scatter" in selected: fig = plt.figure(figsize=(6, 5)) plt.scatter(xs, ys, alpha=0.6, s=12) plt.title(f"{stem} - {x_col} vs {y_col} scatter(sample)") @@ -204,11 +214,28 @@ def create_file_charts( plt.close(fig) artifacts.append(str(out)) + if xs and ys and "line" in selected: + pairs = sorted(zip(xs, ys), key=lambda pair: pair[0]) + fig = plt.figure(figsize=(6, 4)) + plt.plot([p[0] for p in pairs], [p[1] for p in pairs], linewidth=1.3) + plt.title(f"{stem} - {x_col} vs {y_col} line(sample)") + plt.xlabel(x_col) + plt.ylabel(y_col) + plt.tight_layout() + out = out_dir / f"{stem}_{x_col}_{y_col}_line.png" + fig.savefig(out) + plt.close(fig) + artifacts.append(str(out)) + return artifacts -def create_multi_charts(csv_paths: list[Path], out_dir: Path) -> dict[str, Any]: +def create_multi_charts( + csv_paths: list[Path], + out_dir: Path, + selected_chart_types: list[str] | None = None, +) -> dict[str, Any]: results: dict[str, Any] = {} for p in csv_paths: - results[str(p)] = create_file_charts(p, out_dir) + results[str(p)] = create_file_charts(p, out_dir, selected_chart_types=selected_chart_types) return results diff --git a/bitnet_tools/viz_recommender.py b/bitnet_tools/viz_recommender.py new file mode 100644 index 0000000..34bcf7a --- /dev/null +++ b/bitnet_tools/viz_recommender.py @@ -0,0 +1,74 @@ +from __future__ import annotations + +from dataclasses import dataclass + + +@dataclass(frozen=True) +class VizRecommendation: + intent: str + chart_types: list[str] + reason: str + + +_INTENT_RULES: list[tuple[tuple[str, ...], VizRecommendation]] = [ + ( + ("추이", "트렌드", "변화", "시계열", "trend", "over time"), + VizRecommendation( + intent="trend", + chart_types=["line", "scatter"], + reason="시간/순서 기반 변화 파악에는 선형 추세와 분포 확인이 유리합니다.", + ), + ), + ( + ("비교", "랭킹", "상위", "하위", "compare", "ranking"), + VizRecommendation( + intent="comparison", + chart_types=["bar", "boxplot"], + reason="그룹 간 크기 비교에는 막대, 분산 비교에는 박스플롯이 적합합니다.", + ), + ), + ( + ("관계", "상관", "영향", "relationship", "correlation"), + VizRecommendation( + intent="relationship", + chart_types=["scatter", "histogram"], + reason="변수 간 관계는 산점도로, 단일 변수 분포는 히스토그램으로 확인합니다.", + ), + ), + ( + ("비율", "구성", "점유", "composition", "ratio"), + VizRecommendation( + intent="composition", + chart_types=["bar"], + reason="구성 비교는 범주형 막대 차트로 읽기 쉽고 왜곡이 적습니다.", + ), + ), + ( + ("결측", "누락", "품질", "이상치", "missing", "quality", "outlier"), + VizRecommendation( + intent="quality", + chart_types=["missing", "boxplot"], + reason="데이터 품질 확인에는 결측 막대와 이상치 확인용 박스플롯이 효과적입니다.", + ), + ), +] + +_DEFAULT = VizRecommendation( + intent="overview", + chart_types=["histogram", "bar", "scatter"], + reason="일반 탐색 질문으로 판단되어 분포/범주/관계를 함께 확인하는 구성을 추천합니다.", +) + + +def recommend_chart_types(question: str) -> dict[str, object]: + text = (question or "").strip().lower() + if not text: + rec = _DEFAULT + else: + rec = next((rule for keywords, rule in _INTENT_RULES if any(k in text for k in keywords)), _DEFAULT) + + return { + "intent": rec.intent, + "recommended_chart_types": rec.chart_types, + "reason": rec.reason, + } diff --git a/bitnet_tools/web.py b/bitnet_tools/web.py index 0bf1abf..dbbd06e 100644 --- a/bitnet_tools/web.py +++ b/bitnet_tools/web.py @@ -27,6 +27,7 @@ from .multi_csv import analyze_multiple_csv from .planner import build_plan, execute_plan_from_csv_text, parse_question_to_intent from .visualize import create_multi_charts +from .viz_recommender import recommend_chart_types UI_DIR = Path(__file__).parent / "ui" @@ -397,7 +398,35 @@ def get_preprocess_job(job_id: str) -> dict[str, Any]: } return {'job_id': job_id, 'status': status} -def _run_chart_job(job_id: str, files: list[dict[str, str]]) -> dict[str, Any]: + + +def _build_chart_fallback(csv_paths: list[Path]) -> dict[str, Any]: + fallback: dict[str, Any] = {} + for csv_path in csv_paths: + try: + with csv_path.open("r", encoding="utf-8-sig", newline="") as f: + reader = csv.DictReader(f) + headers = [str(h) for h in (reader.fieldnames or [])] + rows = [] + for idx, row in enumerate(reader): + if idx >= 5: + break + rows.append({k: v for k, v in row.items()}) + fallback[str(csv_path)] = { + 'headers': headers, + 'preview_rows': rows, + 'summary': f'chart generation failed; returned {len(rows)} preview rows', + } + except Exception as exc: + fallback[str(csv_path)] = { + 'headers': [], + 'preview_rows': [], + 'summary': f'chart generation failed and fallback preview failed: {exc}', + } + return fallback + + +def _run_chart_job(job_id: str, files: list[dict[str, str]], selected_chart_types: list[str] | None = None) -> dict[str, Any]: CHART_JOB_DIR.mkdir(parents=True, exist_ok=True) job_input_dir = CHART_JOB_DIR / f"{job_id}_input" out_dir = CHART_JOB_DIR / f"{job_id}_charts" @@ -416,21 +445,45 @@ def _run_chart_job(job_id: str, files: list[dict[str, str]]) -> dict[str, Any]: if not csv_paths: raise ValueError('valid csv_text files are required') - charts = create_multi_charts(csv_paths, out_dir) - return { - 'job_id': job_id, - 'status': 'done', - 'chart_count': sum(len(v) for v in charts.values()), - 'charts': charts, - 'output_dir': str(out_dir), - } + try: + try: + charts = create_multi_charts(csv_paths, out_dir, selected_chart_types=selected_chart_types) + except TypeError: + charts = create_multi_charts(csv_paths, out_dir) + chart_count = sum(len(v) for v in charts.values()) + if chart_count == 0: + return { + 'job_id': job_id, + 'status': 'done', + 'chart_count': 0, + 'charts': charts, + 'output_dir': str(out_dir), + 'fallback': _build_chart_fallback(csv_paths), + } + return { + 'job_id': job_id, + 'status': 'done', + 'chart_count': chart_count, + 'charts': charts, + 'output_dir': str(out_dir), + } + except Exception as exc: + return { + 'job_id': job_id, + 'status': 'done', + 'chart_count': 0, + 'charts': {}, + 'output_dir': str(out_dir), + 'fallback': _build_chart_fallback(csv_paths), + 'fallback_reason': str(exc), + } -def submit_chart_job(files: list[dict[str, str]]) -> str: +def submit_chart_job(files: list[dict[str, str]], selected_chart_types: list[str] | None = None) -> str: if not isinstance(files, list) or not files: raise ValueError('files is required') job_id = uuid.uuid4().hex - future = _CHART_EXECUTOR.submit(_run_chart_job, job_id, files) + future = _CHART_EXECUTOR.submit(_run_chart_job, job_id, files, selected_chart_types) with _CHART_LOCK: _CHART_JOBS[job_id] = future return job_id @@ -722,10 +775,20 @@ def do_POST(self) -> None: ) return self._send_json(result) + if route == '/api/viz/recommend': + question = str(payload.get('question', '') or '').strip() + return self._send_json(recommend_chart_types(question)) + if route == "/api/charts/jobs": files = payload.get('files', []) - job_id = submit_chart_job(files) - return self._send_json({'job_id': job_id, 'status': 'queued'}, HTTPStatus.ACCEPTED) + selected_chart_types = payload.get('selected_chart_types', None) + if selected_chart_types is not None and not isinstance(selected_chart_types, list): + return self._send_json(self._error_payload('selected_chart_types must be a list'), HTTPStatus.BAD_REQUEST) + if not selected_chart_types: + question = str(payload.get('question', '') or '').strip() + selected_chart_types = recommend_chart_types(question).get('recommended_chart_types', []) + job_id = submit_chart_job(files, selected_chart_types=selected_chart_types) + return self._send_json({'job_id': job_id, 'status': 'queued', 'selected_chart_types': selected_chart_types}, HTTPStatus.ACCEPTED) if route == "/api/run": model = str(payload.get("model", "")).strip() diff --git a/tests/test_viz_recommender.py b/tests/test_viz_recommender.py new file mode 100644 index 0000000..6ca4525 --- /dev/null +++ b/tests/test_viz_recommender.py @@ -0,0 +1,30 @@ +from bitnet_tools.viz_recommender import recommend_chart_types + + +def test_recommend_trend_question_consistency(): + q = '월별 매출 추이를 보여줘' + first = recommend_chart_types(q) + second = recommend_chart_types(q) + assert first == second + assert first['intent'] == 'trend' + assert 'line' in first['recommended_chart_types'] + + +def test_recommend_relationship_question_consistency(): + q = '광고비와 매출의 상관 관계를 알고 싶어' + result = recommend_chart_types(q) + assert result['intent'] == 'relationship' + assert result['recommended_chart_types'][:2] == ['scatter', 'histogram'] + + +def test_recommend_quality_question_consistency(): + q = '결측치와 이상치가 있는지 확인해줘' + result = recommend_chart_types(q) + assert result['intent'] == 'quality' + assert result['recommended_chart_types'] == ['missing', 'boxplot'] + + +def test_recommend_default_for_unknown_question(): + result = recommend_chart_types('데이터를 한번 살펴봐') + assert result['intent'] == 'overview' + assert result['recommended_chart_types'] == ['histogram', 'bar', 'scatter'] diff --git a/tests/test_web.py b/tests/test_web.py index dc8a9ca..8307d47 100644 --- a/tests/test_web.py +++ b/tests/test_web.py @@ -343,3 +343,18 @@ def test_geo_suspects_api_validates_lat_lon_columns(): finally: server.shutdown() thread.join(timeout=1) + +def test_viz_recommend_api_returns_reason_and_chart_types(): + server, thread = _run_server() + base = f'http://127.0.0.1:{server.server_port}' + try: + code, body = _post_json(base + '/api/viz/recommend', { + 'question': '월별 변화 추이를 보고 싶어', + }) + assert code == 200 + assert body['intent'] == 'trend' + assert 'line' in body['recommended_chart_types'] + assert body['reason'] + finally: + server.shutdown() + thread.join(timeout=2)