Files
cpp-flashcards/leetcode/extract.mjs
T
tomatocream eabb433ec6 Add solution notes scaffold and sub-heading format
- roadmap.org: problems now have *** Python and *** C++ sub-headings,
  plus Notes: links to per-problem org files
- scaffold-notes.mjs: creates 199 note files in org/cpp/dsa/<topic>/
  with backlinks to roadmap.org
- 18 topic folders under org/cpp/dsa/ for NeetCode 150 problems
- Updated AGENTS.md with new conventions and workflow
2026-06-01 02:33:30 +08:00

367 lines
12 KiB
JavaScript

#!/usr/bin/env node
/**
* NeetCode Roadmap Extractor
*
* Fetches the NeetCode roadmap data (dependency graph + problems)
* from the live site and outputs structured JSON, DOT, and org-mode.
*
* Idempotent: same input always produces the same output.
*
* Usage:
* node extract.mjs # write to ./out/
* node extract.mjs --stdout # print JSON to stdout
* node extract.mjs --cache /tmp/nc # cache downloads in dir
*/
import { writeFileSync, mkdirSync, readFileSync, existsSync } from "node:fs";
import { join, dirname } from "node:path";
import { fileURLToPath } from "node:url";
const __dirname = dirname(fileURLToPath(import.meta.url));
// ── Config ──────────────────────────────────────────────────────────────────
const BASE = "https://neetcode.io";
const ROADMAP_CHUNK_ID = 8998; // exports ROADMAP_ROUTES
const GRAPH_DATA_CHUNK_ID = 7669; // contains the actual graph nodes
const LEETCODE_BASE = "https://leetcode.com/problems/";
const GITHUB_SOLUTIONS =
"https://github.com/neetcode-gh/leetcode/blob/main/";
const args = process.argv.slice(2);
const stdoutMode = args.includes("--stdout");
const cacheDir = args.includes("--cache")
? args[args.indexOf("--cache") + 1]
: join(__dirname, ".cache");
const outDir = join(__dirname, "out");
// ── Fetch with optional disk cache ──────────────────────────────────────────
async function fetchText(url, cacheKey) {
const cachePath = join(cacheDir, cacheKey);
if (existsSync(cachePath)) {
return readFileSync(cachePath, "utf8");
}
const res = await fetch(url);
if (!res.ok) throw new Error(`Failed to fetch ${url}: ${res.status}`);
const text = await res.text();
mkdirSync(dirname(cachePath), { recursive: true });
writeFileSync(cachePath, text, "utf8");
return text;
}
// ── Step 1: Get chunk hashes from runtime ───────────────────────────────────
async function getChunkHashes() {
const html = await fetchText(`${BASE}/roadmap`, "roadmap.html");
const runtimeMatch = html.match(/src="(runtime\.[a-f0-9]+\.js)"/);
if (!runtimeMatch) throw new Error("Could not find runtime JS filename");
const runtimeName = runtimeMatch[1];
const runtime = await fetchText(`${BASE}/${runtimeName}`, runtimeName);
const hashes = {};
for (const id of [ROADMAP_CHUNK_ID, GRAPH_DATA_CHUNK_ID]) {
const m = runtime.match(new RegExp(`${id}:"([a-f0-9]+)"`));
if (!m) throw new Error(`Could not find hash for chunk ${id}`);
hashes[id] = m[1];
}
const mainMatch = html.match(/src="(main\.[a-f0-9]+\.js)"/);
if (!mainMatch) throw new Error("Could not find main JS filename");
hashes.main = mainMatch[1];
return hashes;
}
// ── Step 2: Extract graph nodes from chunk 7669 ─────────────────────────────
function extractGraphNodes(chunkSrc) {
const nodes = [];
const re =
/\{id:"(\d+)",name:"([^"]+)",backgroundColor:"([^"]+)"(?:,parentId:\[([^\]]*)\])?\}/g;
let m;
while ((m = re.exec(chunkSrc))) {
const [, id, name, , parentStr] = m;
const parents = parentStr
? parentStr
.split(",")
.map((s) => s.replace(/"/g, "").trim())
.filter(Boolean)
: [];
nodes.push({ id, name, prerequisites: parents });
}
return nodes;
}
// ── Step 3: Extract problems from main bundle ───────────────────────────────
function extractProblems(mainSrc) {
const problems = [];
const re =
/\{problem:"([^"]+)",pattern:"([^"]+)",link:"([^"]+)",video:"([^"]*)",difficulty:"(\w+)",code:"([^"]+)"/g;
let m;
while ((m = re.exec(mainSrc))) {
const [, name, pattern, link, video, difficulty, code] = m;
const obj = { name, pattern, difficulty, code, link };
if (video) obj.video = video;
const ctxStart = Math.max(0, m.index - 50);
const ctxEnd = Math.min(mainSrc.length, m.index + m[0].length + 200);
const ctx = mainSrc.slice(ctxStart, ctxEnd);
if (/neetcode150:!0/.test(ctx)) obj.neetcode150 = true;
if (/blind75:!0/.test(ctx)) obj.blind75 = true;
if (/neetcode250:!0/.test(ctx)) obj.neetcode250 = true;
if (/premium:!0/.test(ctx)) obj.premium = true;
problems.push(obj);
}
return problems;
}
// ── Step 4: Extract course links from chunk 7669 ────────────────────────────
function extractCourses(chunkSrc) {
const courses = {};
const re =
/"([^"]+)":\[\{course:"([^"]+)",name:"([^"]+)",routerLink:"([^"]+)"\}/g;
let m;
while ((m = re.exec(chunkSrc))) {
const topic = m[1];
const arrStart = m.index + topic.length + 2;
const arrEnd = chunkSrc.indexOf("]", arrStart);
const arrStr = chunkSrc.slice(arrStart, arrEnd + 1);
const items = [];
const itemRe =
/\{course:"([^"]+)",name:"([^"]+)",routerLink:"([^"]+)"\}/g;
let im;
while ((im = itemRe.exec(arrStr))) {
items.push({ course: im[1], name: im[2], routerLink: im[3] });
}
if (items.length) courses[topic] = items;
}
return courses;
}
// ── Topological sort ────────────────────────────────────────────────────────
function topoSort(nodes) {
const byId = Object.fromEntries(nodes.map((n) => [n.id, n]));
const visited = new Set();
const result = [];
function visit(id) {
if (visited.has(id)) return;
visited.add(id);
const node = byId[id];
if (!node) return;
for (const p of node.prerequisites) visit(p);
result.push(node);
}
for (const n of nodes) visit(n.id);
return result;
}
// ── Build DOT graph ─────────────────────────────────────────────────────────
function buildDot(nodes) {
const lines = [
"digraph NeetCodeRoadmap {",
' rankdir=TB;',
' node [shape=box, style="rounded,filled", fillcolor="#3f4bd1", fontcolor=white, fontname="Helvetica"];',
' edge [color="#555555", arrowsize=0.8];',
"",
];
for (const n of nodes) {
const label = n.name.replace(/ \/ /g, "\\n").replace(/ /g, "\\n");
lines.push(` "${n.id}" [label="${label}"];`);
}
lines.push("");
for (const n of nodes) {
for (const p of n.prerequisites) {
lines.push(` "${p}" -> "${n.id}";`);
}
}
lines.push("}");
return lines.join("\n") + "\n";
}
// ── Build org-mode file ─────────────────────────────────────────────────────
function buildOrg(sortedNodes, problemsByTopic) {
const lines = [];
const now = new Date().toISOString().slice(0, 10);
lines.push("#+TITLE: NeetCode Roadmap");
lines.push(`#+DATE: ${now}`);
lines.push("#+TODO: TODO DONE");
lines.push("#+STARTUP: overview");
lines.push("");
lines.push("Source: [[https://neetcode.io/roadmap][neetcode.io/roadmap]]");
lines.push("");
const difficultyTag = (d) =>
d === "Easy" ? "easy" : d === "Medium" ? "medium" : "hard";
const topicSlug = (name) =>
name
.toLowerCase()
.replace(/[^a-z0-9]+/g, "-")
.replace(/(^-|-$)/g, "");
const notesRoot = "../../org/cpp/dsa";
for (const node of sortedNodes) {
const topicProblems = (problemsByTopic[node.name] || []).filter(
(p) => p.neetcode150
);
const slug = topicSlug(node.name);
lines.push(`* TODO ${node.name} [/]`);
lines.push("");
if (topicProblems.length === 0) {
lines.push(" (no NeetCode 150 problems)");
lines.push("");
continue;
}
for (const p of topicProblems) {
const tag = difficultyTag(p.difficulty);
const lcUrl = `${LEETCODE_BASE}${p.link}`;
const num = p.code.split("-")[0];
const notesFile = `${notesRoot}/${slug}/${p.code}.org`;
lines.push(`** TODO ${num}. ${p.name} :${tag}:`);
lines.push(`*** Python`);
lines.push(`- [[${GITHUB_SOLUTIONS}python/${p.code}.py][${p.code}.py]]`);
lines.push(`*** C++`);
lines.push(`- [[${GITHUB_SOLUTIONS}cpp/${p.code}.cpp][${p.code}.cpp]]`);
lines.push(`- LeetCode: [[${lcUrl}][${p.link}]]`);
if (p.video)
lines.push(
`- Video: [[https://youtube.com/watch?v=${p.video}][explanation]]`
);
lines.push(`- Notes: [[${notesFile}][My Solution]]`);
}
lines.push("");
}
return lines.join("\n");
}
// ── Main ────────────────────────────────────────────────────────────────────
async function main() {
const hashes = await getChunkHashes();
const [graphChunk, mainSrc] = await Promise.all([
fetchText(
`${BASE}/${GRAPH_DATA_CHUNK_ID}.${hashes[GRAPH_DATA_CHUNK_ID]}.js`,
`${GRAPH_DATA_CHUNK_ID}.${hashes[GRAPH_DATA_CHUNK_ID]}.js`
),
fetchText(`${BASE}/${hashes.main}`, hashes.main),
]);
const nodes = extractGraphNodes(graphChunk);
const problems = extractProblems(mainSrc);
const courses = extractCourses(graphChunk);
// Build edges from prerequisites
const edges = [];
for (const n of nodes) {
for (const p of n.prerequisites) {
edges.push({ from: p, to: n.id, meaning: "prerequisite" });
}
}
// Group problems by topic
const problemsByTopic = {};
for (const p of problems) {
if (!problemsByTopic[p.pattern]) problemsByTopic[p.pattern] = [];
problemsByTopic[p.pattern].push(p);
}
// NeetCode 150 only
const nc150Problems = problems.filter((p) => p.neetcode150);
const nc150ByTopic = {};
for (const p of nc150Problems) {
if (!nc150ByTopic[p.pattern]) nc150ByTopic[p.pattern] = [];
nc150ByTopic[p.pattern].push(p);
}
// Topological sort for org output
const sorted = topoSort(nodes);
const result = {
source: "https://neetcode.io/roadmap",
extracted: new Date().toISOString().slice(0, 10),
graph: { nodes, edges },
problemsByTopic,
coursesByTopic: courses,
stats: {
topics: nodes.length,
edges: edges.length,
totalProblems: problems.length,
neetcode150: nc150Problems.length,
},
};
if (stdoutMode) {
process.stdout.write(JSON.stringify(result, null, 2) + "\n");
} else {
mkdirSync(outDir, { recursive: true });
// Full data
writeFileSync(
join(outDir, "roadmap.json"),
JSON.stringify(result, null, 2) + "\n",
"utf8"
);
// NeetCode 150 only
const nc150Result = {
source: result.source,
extracted: result.extracted,
graph: result.graph,
problemsByTopic: nc150ByTopic,
coursesByTopic: courses,
stats: {
topics: nodes.length,
edges: edges.length,
problems: nc150Problems.length,
},
};
writeFileSync(
join(outDir, "roadmap-neetcode150.json"),
JSON.stringify(nc150Result, null, 2) + "\n",
"utf8"
);
// DOT
writeFileSync(join(outDir, "roadmap.dot"), buildDot(nodes), "utf8");
// Org-mode
writeFileSync(
join(outDir, "roadmap.org"),
buildOrg(sorted, problemsByTopic),
"utf8"
);
console.log(`Wrote ${outDir}/roadmap.json (${result.stats.totalProblems} problems total)`);
console.log(`Wrote ${outDir}/roadmap-neetcode150.json (${result.stats.neetcode150} problems)`);
console.log(`Wrote ${outDir}/roadmap.dot`);
console.log(`Wrote ${outDir}/roadmap.org`);
console.log(
` ${result.stats.topics} topics, ${result.stats.edges} edges`
);
}
}
main().catch((err) => {
console.error(err);
process.exit(1);
});