Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions .devcontainer/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,14 @@
FROM mcr.microsoft.com/devcontainers/base:jammy

# SEO メタ情報の補正スクリプトを実行するために Node.js をインストール
RUN apt-get update && \
apt-get install -y --no-install-recommends ca-certificates curl gnupg && \
curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor -o /usr/share/keyrings/nodesource.gpg && \
echo "deb [signed-by=/usr/share/keyrings/nodesource.gpg] https://deb.nodesource.com/node_22.x nodistro main" > /etc/apt/sources.list.d/nodesource.list && \
apt-get update && \
apt-get install -y --no-install-recommends nodejs && \
rm -rf /var/lib/apt/lists/*

# ユーザの追加
USER vscode
WORKDIR /home/vscode
Expand Down
1 change: 1 addition & 0 deletions .github/CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

* [mdbook](https://github.com/rust-lang/mdBook) を使用して markdown ファイルから HTML を生成しています。以下のプラグインを使用しています。
* [mdbook-admonish](https://github.com/tommilligan/mdbook-admonish) を使用してカードを表示させています。
* [Node.js](https://nodejs.org/) を生成された HTML の SEO メタ情報を補正するスクリプトの実行に使用しています。

> [!IMPORTANT]
> 開発に使用する mdbook のバージョンは `0.4.48` に固定してください。
Expand Down
5 changes: 5 additions & 0 deletions .github/workflows/deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,11 @@ jobs:
curl https://raw.githubusercontent.com/leanprover/elan/master/elan-init.sh -sSf | sh -s -- -y --default-toolchain none
echo "$HOME/.elan/bin" >> $GITHUB_PATH

- name: Setup Node.js
uses: actions/setup-node@v6
with:
node-version: '22'

- name: Setup mdBook
uses: peaceiris/actions-mdbook@v2
with:
Expand Down
6 changes: 4 additions & 2 deletions .lycheeignore
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
https://adam.math.hhu.de/#/g/leanprover-community/NNG4
https://dcbadge.limes.pink/api/server/*
https://dl.acm.org/doi/10.1145/*
https://dl.acm.org/doi/pdf/10.1145/*
https://hackmd.io/*
https://lean-forward.github.io/
https://lean-lang.org/*
https://leansearch.net/
https://live.lean-lang.org/
https://raw.githubusercontent.com/lean-ja/lean-by-example/.*
https://recharts.org/en-US
https://reservoir.lean-lang.org/
https://dl.acm.org/doi/pdf/10.1145/*
https://dl.acm.org/doi/10.1145/*
https://www.cs.cmu.edu/*
https://www.cs.ru.nl/*
1 change: 1 addition & 0 deletions lakefile.lean
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ script build do
runCmd "lake exe mdgen LeanByExample booksrc --count --exercise"
runCmd "lake exe mdgen Exe booksrc"
runCmd "mdbook build"
runCmd "node scripts/updateSeoMetadata.mjs"
return 0

end BuildScript
Expand Down
318 changes: 318 additions & 0 deletions scripts/updateSeoMetadata.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,318 @@
import fs from "node:fs";
import path from "node:path";
import { fileURLToPath } from "node:url";

const scriptDir = path.dirname(fileURLToPath(import.meta.url));
const projectRoot = path.resolve(scriptDir, "..");
const bookDir = path.join(projectRoot, "book");
const configPath = path.join(projectRoot, "book.toml");

const configText = fs.readFileSync(configPath, "utf8");
const siteRoot = ensureTrailingSlash(readTomlString(configText, "site-url"));
const bookTitle = readTomlString(configText, "title") ?? "Lean by Example";
const defaultDescription = readTomlString(configText, "description") ?? "";
const defaultImageUrl = new URL("image/project_image.png", siteRoot).toString();

if (!fs.existsSync(bookDir)) {
throw new Error(`Build directory does not exist: ${bookDir}`);
}

let updatedCount = 0;
let indexedCount = 0;
let skippedCount = 0;

for (const filePath of findHtmlFiles(bookDir)) {
const relativePath = toPosixPath(path.relative(bookDir, filePath));
const originalHtml = fs.readFileSync(filePath, "utf8");
let html = originalHtml;

if (relativePath === "404.html") {
const description = extractPageDescription(html) || "お探しのページが見つかりませんでした。";
html = upsertTag(
html,
/<meta\s+name=["']description["'][^>]*>/i,
`<meta name="description" content="${escapeHtml(description)}">`,
);
html = upsertTag(
html,
/<meta\s+name=["']robots["'][^>]*>/i,
'<meta name="robots" content="noindex">',
);
html = removeIndexableSeoTags(html);
skippedCount++;
} else if (hasNoindex(html)) {
skippedCount++;
} else {
const title = extractTitle(html) ?? bookTitle;
const description = extractPageDescription(html) || defaultDescription;
const canonicalUrl = canonicalUrlFor(relativePath);
const ogType = relativePath === "index.html" ? "website" : "article";

html = upsertTag(
html,
/<meta\s+name=["']description["'][^>]*>/i,
`<meta name="description" content="${escapeHtml(description)}">`,
);
html = upsertTag(
html,
/<link\s+rel=["']canonical["'][^>]*>/i,
`<link rel="canonical" href="${escapeHtml(canonicalUrl)}">`,
);
html = upsertTag(
html,
/<meta\s+property=["']og:site_name["'][^>]*>/i,
`<meta property="og:site_name" content="${escapeHtml(bookTitle)}">`,
);
html = upsertTag(
html,
/<meta\s+property=["']og:title["'][^>]*>/i,
`<meta property="og:title" content="${escapeHtml(title)}">`,
);
html = upsertTag(
html,
/<meta\s+property=["']og:description["'][^>]*>/i,
`<meta property="og:description" content="${escapeHtml(description)}">`,
);
html = upsertTag(
html,
/<meta\s+property=["']og:type["'][^>]*>/i,
`<meta property="og:type" content="${ogType}">`,
);
html = upsertTag(
html,
/<meta\s+property=["']og:url["'][^>]*>/i,
`<meta property="og:url" content="${escapeHtml(canonicalUrl)}">`,
);
html = upsertTag(
html,
/<meta\s+property=["']og:image["'][^>]*>/i,
`<meta property="og:image" content="${escapeHtml(defaultImageUrl)}">`,
);
html = upsertTag(
html,
/<meta\s+property=["']og:locale["'][^>]*>/i,
'<meta property="og:locale" content="ja_JP">',
);
html = upsertTag(
html,
/<meta\s+name=["']twitter:card["'][^>]*>/i,
'<meta name="twitter:card" content="summary_large_image">',
);
html = upsertTag(
html,
/<meta\s+name=["']twitter:title["'][^>]*>/i,
`<meta name="twitter:title" content="${escapeHtml(title)}">`,
);
html = upsertTag(
html,
/<meta\s+name=["']twitter:description["'][^>]*>/i,
`<meta name="twitter:description" content="${escapeHtml(description)}">`,
);
html = upsertTag(
html,
/<meta\s+name=["']twitter:image["'][^>]*>/i,
`<meta name="twitter:image" content="${escapeHtml(defaultImageUrl)}">`,
);

indexedCount++;
}

if (html !== originalHtml) {
fs.writeFileSync(filePath, html);
updatedCount++;
}
}

console.log(
`Updated SEO metadata in ${updatedCount} HTML files (${indexedCount} indexable pages, ${skippedCount} noindex pages).`,
);

function readTomlString(text, key) {
const match = text.match(new RegExp(`^${escapeRegExp(key)}\\s*=\\s*"((?:\\\\.|[^"])*)"`, "m"));
if (!match) {
return null;
}

return match[1].replace(/\\([btnfr"\\])/g, (_, char) => {
switch (char) {
case "b":
return "\b";
case "t":
return "\t";
case "n":
return "\n";
case "f":
return "\f";
case "r":
return "\r";
case '"':
return '"';
case "\\":
return "\\";
default:
return char;
}
});
}

function ensureTrailingSlash(url) {
if (!url) {
throw new Error('Missing "site-url" in book.toml');
}

return url.endsWith("/") ? url : `${url}/`;
}

function findHtmlFiles(dir) {
const entries = fs.readdirSync(dir, { withFileTypes: true });
const files = [];

for (const entry of entries) {
const entryPath = path.join(dir, entry.name);
if (entry.isDirectory()) {
files.push(...findHtmlFiles(entryPath));
} else if (entry.isFile() && entry.name.endsWith(".html")) {
files.push(entryPath);
}
}

return files;
}

function toPosixPath(filePath) {
return filePath.split(path.sep).join("/");
}

function canonicalUrlFor(relativePath) {
let urlPath = relativePath;

if (urlPath === "index.html") {
urlPath = "";
} else if (urlPath.endsWith("/index.html")) {
urlPath = urlPath.slice(0, -"index.html".length);
}

return new URL(encodeURI(urlPath), siteRoot).toString();
}

function hasNoindex(html) {
return /<meta\s+name=["']robots["']\s+content=["'][^"']*\bnoindex\b[^"']*["'][^>]*>/i.test(html);
}

function extractTitle(html) {
const match = html.match(/<title>([\s\S]*?)<\/title>/i);
if (!match) {
return null;
}

return normalizeText(decodeHtmlEntities(match[1]));
}

function extractPageDescription(html) {
const mainHtml = extractMainHtml(html);
const paragraphMatches = mainHtml.matchAll(/<p(?:\s[^>]*)?>([\s\S]*?)<\/p>/gi);

for (const match of paragraphMatches) {
const text = normalizeText(toPlainText(match[1]));
if (text.length >= 10) {
return truncateDescription(text);
}
}

return "";
}

function extractMainHtml(html) {
const startMarker = '<div class="content-wrap">';
const endMarker = '<div class="sidetoc">';
const start = html.indexOf(startMarker);

if (start === -1) {
return html;
}

const end = html.indexOf(endMarker, start);
if (end === -1) {
return html.slice(start + startMarker.length);
}

return html.slice(start + startMarker.length, end);
}

function toPlainText(fragment) {
return decodeHtmlEntities(
fragment
.replace(/<script\b[\s\S]*?<\/script>/gi, " ")
.replace(/<style\b[\s\S]*?<\/style>/gi, " ")
.replace(/<br\s*\/?>/gi, " ")
.replace(/<[^>]+>/g, " "),
);
}

function decodeHtmlEntities(text) {
const namedEntities = {
amp: "&",
apos: "'",
gt: ">",
lt: "<",
nbsp: " ",
quot: '"',
};

return text.replace(/&(#x[0-9a-fA-F]+|#\d+|[a-zA-Z][a-zA-Z0-9]+);/g, (match, entity) => {
if (entity.startsWith("#x")) {
return String.fromCodePoint(Number.parseInt(entity.slice(2), 16));
}
if (entity.startsWith("#")) {
return String.fromCodePoint(Number.parseInt(entity.slice(1), 10));
}

return namedEntities[entity] ?? match;
});
}

function normalizeText(text) {
return text
.replace(/\s+/g, " ")
.replace(/\s+([、。!?])/g, "$1")
.replace(/([、。!?])\s+/g, "$1")
.trim();
}

function truncateDescription(text) {
const maxLength = 160;
const chars = Array.from(text);

if (chars.length <= maxLength) {
return text;
}

return `${chars.slice(0, maxLength - 3).join("").trimEnd()}...`;
}

function upsertTag(html, pattern, tag) {
if (pattern.test(html)) {
return html.replace(pattern, tag);
}

return html.replace(/<\/head>/i, ` ${tag}\n </head>`);
}

function removeIndexableSeoTags(html) {
return html
.replace(/\n?\s*<link\s+rel=["']canonical["'][^>]*>/gi, "")
.replace(/\n?\s*<meta\s+property=["']og:[^"']+["'][^>]*>/gi, "")
.replace(/\n?\s*<meta\s+name=["']twitter:[^"']+["'][^>]*>/gi, "");
}

function escapeHtml(value) {
return value
.replace(/&/g, "&amp;")
.replace(/"/g, "&quot;")
.replace(/</g, "&lt;")
.replace(/>/g, "&gt;");
}

function escapeRegExp(value) {
return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
}
Loading
Loading