From 19e09d564b4eaeba04d5887e20f288669ce9a7a2 Mon Sep 17 00:00:00 2001 From: Seasawher <47292598+Seasawher@users.noreply.github.com> Date: Fri, 5 Jun 2026 20:07:38 +0900 Subject: [PATCH 1/4] =?UTF-8?q?SEO=E3=81=AB=E7=9D=80=E6=89=8B=E3=81=99?= =?UTF-8?q?=E3=82=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .devcontainer/Dockerfile | 11 ++ .github/CONTRIBUTING.md | 1 + .github/workflows/deploy.yml | 5 + lakefile.lean | 1 + scripts/updateSeoMetadata.mjs | 318 ++++++++++++++++++++++++++++++++++ theme/index.hbs | 15 ++ 6 files changed, 351 insertions(+) create mode 100644 scripts/updateSeoMetadata.mjs diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index de54af52..38e188f5 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -1,5 +1,14 @@ FROM mcr.microsoft.com/devcontainers/base:jammy +# SEO メタ情報の補正スクリプトを実行するために Node.js をインストール +RUN apt-get update && \ + apt-get install -y --no-install-recommends ca-certificates curl gnupg && \ + curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor -o /usr/share/keyrings/nodesource.gpg && \ + echo "deb [signed-by=/usr/share/keyrings/nodesource.gpg] https://deb.nodesource.com/node_22.x nodistro main" > /etc/apt/sources.list.d/nodesource.list && \ + apt-get update && \ + apt-get install -y --no-install-recommends nodejs && \ + rm -rf /var/lib/apt/lists/* + # ユーザの追加 USER vscode WORKDIR /home/vscode @@ -21,3 +30,5 @@ ENV PATH="${HOME}/.cargo/bin:${PATH}" # mdbook と mdbook のプラグインのインストール RUN cargo install --version 0.4.48 mdbook && \ cargo install --version 1.18.0 mdbook-admonish + +CMD ["lake", "run", "build"] diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 8d5439ed..08ae188e 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -6,6 +6,7 @@ * [mdbook](https://github.com/rust-lang/mdBook) を使用して markdown ファイルから HTML を生成しています。以下のプラグインを使用しています。 * [mdbook-admonish](https://github.com/tommilligan/mdbook-admonish) を使用してカードを表示させています。 +* [Node.js](https://nodejs.org/) を生成された HTML の SEO メタ情報を補正するスクリプトの実行に使用しています。 > [!IMPORTANT] > 開発に使用する mdbook のバージョンは `0.4.48` に固定してください。 diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 59a17370..67f6f793 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -29,6 +29,11 @@ jobs: curl https://raw.githubusercontent.com/leanprover/elan/master/elan-init.sh -sSf | sh -s -- -y --default-toolchain none echo "$HOME/.elan/bin" >> $GITHUB_PATH + - name: Setup Node.js + uses: actions/setup-node@v6 + with: + node-version: '22' + - name: Setup mdBook uses: peaceiris/actions-mdbook@v2 with: diff --git a/lakefile.lean b/lakefile.lean index 5e1fcc4c..82b00bde 100644 --- a/lakefile.lean +++ b/lakefile.lean @@ -49,6 +49,7 @@ script build do runCmd "lake exe mdgen LeanByExample booksrc --count --exercise" runCmd "lake exe mdgen Exe booksrc" runCmd "mdbook build" + runCmd "node scripts/updateSeoMetadata.mjs" return 0 end BuildScript diff --git a/scripts/updateSeoMetadata.mjs b/scripts/updateSeoMetadata.mjs new file mode 100644 index 00000000..7588e639 --- /dev/null +++ b/scripts/updateSeoMetadata.mjs @@ -0,0 +1,318 @@ +import fs from "node:fs"; +import path from "node:path"; +import { fileURLToPath } from "node:url"; + +const scriptDir = path.dirname(fileURLToPath(import.meta.url)); +const projectRoot = path.resolve(scriptDir, ".."); +const bookDir = path.join(projectRoot, "book"); +const configPath = path.join(projectRoot, "book.toml"); + +const configText = fs.readFileSync(configPath, "utf8"); +const siteRoot = ensureTrailingSlash(readTomlString(configText, "site-url")); +const bookTitle = readTomlString(configText, "title") ?? "Lean by Example"; +const defaultDescription = readTomlString(configText, "description") ?? ""; +const defaultImageUrl = new URL("image/project_image.png", siteRoot).toString(); + +if (!fs.existsSync(bookDir)) { + throw new Error(`Build directory does not exist: ${bookDir}`); +} + +let updatedCount = 0; +let indexedCount = 0; +let skippedCount = 0; + +for (const filePath of findHtmlFiles(bookDir)) { + const relativePath = toPosixPath(path.relative(bookDir, filePath)); + const originalHtml = fs.readFileSync(filePath, "utf8"); + let html = originalHtml; + + if (relativePath === "404.html") { + const description = extractPageDescription(html) || "お探しのページが見つかりませんでした。"; + html = upsertTag( + html, + /]*>/i, + ``, + ); + html = upsertTag( + html, + /]*>/i, + '', + ); + html = removeIndexableSeoTags(html); + skippedCount++; + } else if (hasNoindex(html)) { + skippedCount++; + } else { + const title = extractTitle(html) ?? bookTitle; + const description = extractPageDescription(html) || defaultDescription; + const canonicalUrl = canonicalUrlFor(relativePath); + const ogType = relativePath === "index.html" ? "website" : "article"; + + html = upsertTag( + html, + /]*>/i, + ``, + ); + html = upsertTag( + html, + /]*>/i, + ``, + ); + html = upsertTag( + html, + /]*>/i, + ``, + ); + html = upsertTag( + html, + /]*>/i, + ``, + ); + html = upsertTag( + html, + /]*>/i, + ``, + ); + html = upsertTag( + html, + /]*>/i, + ``, + ); + html = upsertTag( + html, + /]*>/i, + ``, + ); + html = upsertTag( + html, + /]*>/i, + ``, + ); + html = upsertTag( + html, + /]*>/i, + '', + ); + html = upsertTag( + html, + /]*>/i, + '', + ); + html = upsertTag( + html, + /]*>/i, + ``, + ); + html = upsertTag( + html, + /]*>/i, + ``, + ); + html = upsertTag( + html, + /]*>/i, + ``, + ); + + indexedCount++; + } + + if (html !== originalHtml) { + fs.writeFileSync(filePath, html); + updatedCount++; + } +} + +console.log( + `Updated SEO metadata in ${updatedCount} HTML files (${indexedCount} indexable pages, ${skippedCount} noindex pages).`, +); + +function readTomlString(text, key) { + const match = text.match(new RegExp(`^${escapeRegExp(key)}\\s*=\\s*"((?:\\\\.|[^"])*)"`, "m")); + if (!match) { + return null; + } + + return match[1].replace(/\\([btnfr"\\])/g, (_, char) => { + switch (char) { + case "b": + return "\b"; + case "t": + return "\t"; + case "n": + return "\n"; + case "f": + return "\f"; + case "r": + return "\r"; + case '"': + return '"'; + case "\\": + return "\\"; + default: + return char; + } + }); +} + +function ensureTrailingSlash(url) { + if (!url) { + throw new Error('Missing "site-url" in book.toml'); + } + + return url.endsWith("/") ? url : `${url}/`; +} + +function findHtmlFiles(dir) { + const entries = fs.readdirSync(dir, { withFileTypes: true }); + const files = []; + + for (const entry of entries) { + const entryPath = path.join(dir, entry.name); + if (entry.isDirectory()) { + files.push(...findHtmlFiles(entryPath)); + } else if (entry.isFile() && entry.name.endsWith(".html")) { + files.push(entryPath); + } + } + + return files; +} + +function toPosixPath(filePath) { + return filePath.split(path.sep).join("/"); +} + +function canonicalUrlFor(relativePath) { + let urlPath = relativePath; + + if (urlPath === "index.html") { + urlPath = ""; + } else if (urlPath.endsWith("/index.html")) { + urlPath = urlPath.slice(0, -"index.html".length); + } + + return new URL(encodeURI(urlPath), siteRoot).toString(); +} + +function hasNoindex(html) { + return /]*>/i.test(html); +} + +function extractTitle(html) { + const match = html.match(/([\s\S]*?)<\/title>/i); + if (!match) { + return null; + } + + return normalizeText(decodeHtmlEntities(match[1])); +} + +function extractPageDescription(html) { + const mainHtml = extractMainHtml(html); + const paragraphMatches = mainHtml.matchAll(/<p(?:\s[^>]*)?>([\s\S]*?)<\/p>/gi); + + for (const match of paragraphMatches) { + const text = normalizeText(toPlainText(match[1])); + if (text.length >= 10) { + return truncateDescription(text); + } + } + + return ""; +} + +function extractMainHtml(html) { + const startMarker = '<div class="content-wrap">'; + const endMarker = '<div class="sidetoc">'; + const start = html.indexOf(startMarker); + + if (start === -1) { + return html; + } + + const end = html.indexOf(endMarker, start); + if (end === -1) { + return html.slice(start + startMarker.length); + } + + return html.slice(start + startMarker.length, end); +} + +function toPlainText(fragment) { + return decodeHtmlEntities( + fragment + .replace(/<script\b[\s\S]*?<\/script>/gi, " ") + .replace(/<style\b[\s\S]*?<\/style>/gi, " ") + .replace(/<br\s*\/?>/gi, " ") + .replace(/<[^>]+>/g, " "), + ); +} + +function decodeHtmlEntities(text) { + const namedEntities = { + amp: "&", + apos: "'", + gt: ">", + lt: "<", + nbsp: " ", + quot: '"', + }; + + return text.replace(/&(#x[0-9a-fA-F]+|#\d+|[a-zA-Z][a-zA-Z0-9]+);/g, (match, entity) => { + if (entity.startsWith("#x")) { + return String.fromCodePoint(Number.parseInt(entity.slice(2), 16)); + } + if (entity.startsWith("#")) { + return String.fromCodePoint(Number.parseInt(entity.slice(1), 10)); + } + + return namedEntities[entity] ?? match; + }); +} + +function normalizeText(text) { + return text + .replace(/\s+/g, " ") + .replace(/\s+([、。!?])/g, "$1") + .replace(/([、。!?])\s+/g, "$1") + .trim(); +} + +function truncateDescription(text) { + const maxLength = 160; + const chars = Array.from(text); + + if (chars.length <= maxLength) { + return text; + } + + return `${chars.slice(0, maxLength - 3).join("").trimEnd()}...`; +} + +function upsertTag(html, pattern, tag) { + if (pattern.test(html)) { + return html.replace(pattern, tag); + } + + return html.replace(/<\/head>/i, ` ${tag}\n </head>`); +} + +function removeIndexableSeoTags(html) { + return html + .replace(/\n?\s*<link\s+rel=["']canonical["'][^>]*>/gi, "") + .replace(/\n?\s*<meta\s+property=["']og:[^"']+["'][^>]*>/gi, "") + .replace(/\n?\s*<meta\s+name=["']twitter:[^"']+["'][^>]*>/gi, ""); +} + +function escapeHtml(value) { + return value + .replace(/&/g, "&") + .replace(/"/g, """) + .replace(/</g, "<") + .replace(/>/g, ">"); +} + +function escapeRegExp(value) { + return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); +} diff --git a/theme/index.hbs b/theme/index.hbs index 3af13d90..eb7a65cf 100644 --- a/theme/index.hbs +++ b/theme/index.hbs @@ -16,6 +16,21 @@ {{> head}} <meta name="description" content="{{ description }}"> + {{#if is_print}} + {{else}} + <link rel="canonical" href="{{ path_to_root }}"> + <meta property="og:site_name" content="{{ book_title }}"> + <meta property="og:title" content="{{ title }}"> + <meta property="og:description" content="{{ description }}"> + <meta property="og:type" content="article"> + <meta property="og:url" content="{{ path_to_root }}"> + <meta property="og:image" content="{{ path_to_root }}image/project_image.png"> + <meta property="og:locale" content="ja_JP"> + <meta name="twitter:card" content="summary_large_image"> + <meta name="twitter:title" content="{{ title }}"> + <meta name="twitter:description" content="{{ description }}"> + <meta name="twitter:image" content="{{ path_to_root }}image/project_image.png"> + {{/if}} <meta name="viewport" content="width=device-width, initial-scale=1"> <meta name="theme-color" content="#ffffff"> From 3868bd335104d0a9c103844ffc42b7243f2d84f2 Mon Sep 17 00:00:00 2001 From: Seasawher <47292598+Seasawher@users.noreply.github.com> Date: Fri, 5 Jun 2026 20:12:20 +0900 Subject: [PATCH 2/4] =?UTF-8?q?lychee=20=E3=82=92=E9=BB=99=E3=82=89?= =?UTF-8?q?=E3=81=9B=E3=82=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .lycheeignore | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.lycheeignore b/.lycheeignore index f5ceb3e4..77919f88 100644 --- a/.lycheeignore +++ b/.lycheeignore @@ -1,5 +1,7 @@ https://adam.math.hhu.de/#/g/leanprover-community/NNG4 https://dcbadge.limes.pink/api/server/* +https://dl.acm.org/doi/10.1145/* +https://dl.acm.org/doi/pdf/10.1145/* https://lean-forward.github.io/ https://lean-lang.org/* https://leansearch.net/ @@ -7,6 +9,5 @@ https://live.lean-lang.org/ https://raw.githubusercontent.com/lean-ja/lean-by-example/.* https://recharts.org/en-US https://reservoir.lean-lang.org/ -https://dl.acm.org/doi/pdf/10.1145/* -https://dl.acm.org/doi/10.1145/* https://www.cs.cmu.edu/* +https://www.cs.ru.nl/* From c1bf935c3a652f51b08256c85ed5deb45ff39ce8 Mon Sep 17 00:00:00 2001 From: Seasawher <47292598+Seasawher@users.noreply.github.com> Date: Fri, 5 Jun 2026 20:15:03 +0900 Subject: [PATCH 3/4] =?UTF-8?q?lake=20run=20build=20=E3=82=92=E5=AE=9F?= =?UTF-8?q?=E8=A1=8C=E3=81=97=E3=81=AA=E3=81=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit なぜなら Docker 内にリポジトリがないあkら --- .devcontainer/Dockerfile | 2 -- 1 file changed, 2 deletions(-) diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 38e188f5..76e7b8e8 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -30,5 +30,3 @@ ENV PATH="${HOME}/.cargo/bin:${PATH}" # mdbook と mdbook のプラグインのインストール RUN cargo install --version 0.4.48 mdbook && \ cargo install --version 1.18.0 mdbook-admonish - -CMD ["lake", "run", "build"] From e7e019419d0ccc32055397d76042a769edb21a7f Mon Sep 17 00:00:00 2001 From: Seasawher <47292598+Seasawher@users.noreply.github.com> Date: Fri, 5 Jun 2026 20:17:38 +0900 Subject: [PATCH 4/4] =?UTF-8?q?lychee=20=E3=82=92=E9=BB=99=E3=82=89?= =?UTF-8?q?=E3=81=9B=E3=82=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .lycheeignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.lycheeignore b/.lycheeignore index 77919f88..ab7eec1e 100644 --- a/.lycheeignore +++ b/.lycheeignore @@ -2,6 +2,7 @@ https://adam.math.hhu.de/#/g/leanprover-community/NNG4 https://dcbadge.limes.pink/api/server/* https://dl.acm.org/doi/10.1145/* https://dl.acm.org/doi/pdf/10.1145/* +https://hackmd.io/* https://lean-forward.github.io/ https://lean-lang.org/* https://leansearch.net/