feat: add NeetCode roadmap extractor with dependency graph
- extract.mjs: idempotent script that fetches neetcode.io JS chunks, extracts topic dependency graph (18 topics, 21 edges) and problems (965 total, 199 NeetCode 150) - out/roadmap.json: full data (graph + all problems + courses) - out/roadmap-neetcode150.json: filtered to NeetCode 150 only - out/roadmap.dot: Graphviz visualization - out/roadmap.org: org-mode with TODO checklists, Python/C++ links - neetcode-roadmap-graph.json: standalone edge list - neetcode-roadmap.dot: standalone DOT file Also reformats subarray table in qn_00.org
This commit is contained in:
@@ -0,0 +1,353 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
/**
|
||||
* NeetCode Roadmap Extractor
|
||||
*
|
||||
* Fetches the NeetCode roadmap data (dependency graph + problems)
|
||||
* from the live site and outputs structured JSON, DOT, and org-mode.
|
||||
*
|
||||
* Idempotent: same input always produces the same output.
|
||||
*
|
||||
* Usage:
|
||||
* node extract.mjs # write to ./out/
|
||||
* node extract.mjs --stdout # print JSON to stdout
|
||||
* node extract.mjs --cache /tmp/nc # cache downloads in dir
|
||||
*/
|
||||
|
||||
import { writeFileSync, mkdirSync, readFileSync, existsSync } from "node:fs";
|
||||
import { join, dirname } from "node:path";
|
||||
import { fileURLToPath } from "node:url";
|
||||
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
|
||||
// ── Config ──────────────────────────────────────────────────────────────────
|
||||
|
||||
const BASE = "https://neetcode.io";
|
||||
const ROADMAP_CHUNK_ID = 8998; // exports ROADMAP_ROUTES
|
||||
const GRAPH_DATA_CHUNK_ID = 7669; // contains the actual graph nodes
|
||||
const LEETCODE_BASE = "https://leetcode.com/problems/";
|
||||
const GITHUB_SOLUTIONS =
|
||||
"https://github.com/neetcode-gh/leetcode/blob/main/";
|
||||
|
||||
const args = process.argv.slice(2);
|
||||
const stdoutMode = args.includes("--stdout");
|
||||
const cacheDir = args.includes("--cache")
|
||||
? args[args.indexOf("--cache") + 1]
|
||||
: join(__dirname, ".cache");
|
||||
const outDir = join(__dirname, "out");
|
||||
|
||||
// ── Fetch with optional disk cache ──────────────────────────────────────────
|
||||
|
||||
async function fetchText(url, cacheKey) {
|
||||
const cachePath = join(cacheDir, cacheKey);
|
||||
if (existsSync(cachePath)) {
|
||||
return readFileSync(cachePath, "utf8");
|
||||
}
|
||||
const res = await fetch(url);
|
||||
if (!res.ok) throw new Error(`Failed to fetch ${url}: ${res.status}`);
|
||||
const text = await res.text();
|
||||
mkdirSync(dirname(cachePath), { recursive: true });
|
||||
writeFileSync(cachePath, text, "utf8");
|
||||
return text;
|
||||
}
|
||||
|
||||
// ── Step 1: Get chunk hashes from runtime ───────────────────────────────────
|
||||
|
||||
async function getChunkHashes() {
|
||||
const html = await fetchText(`${BASE}/roadmap`, "roadmap.html");
|
||||
const runtimeMatch = html.match(/src="(runtime\.[a-f0-9]+\.js)"/);
|
||||
if (!runtimeMatch) throw new Error("Could not find runtime JS filename");
|
||||
const runtimeName = runtimeMatch[1];
|
||||
const runtime = await fetchText(`${BASE}/${runtimeName}`, runtimeName);
|
||||
|
||||
const hashes = {};
|
||||
for (const id of [ROADMAP_CHUNK_ID, GRAPH_DATA_CHUNK_ID]) {
|
||||
const m = runtime.match(new RegExp(`${id}:"([a-f0-9]+)"`));
|
||||
if (!m) throw new Error(`Could not find hash for chunk ${id}`);
|
||||
hashes[id] = m[1];
|
||||
}
|
||||
|
||||
const mainMatch = html.match(/src="(main\.[a-f0-9]+\.js)"/);
|
||||
if (!mainMatch) throw new Error("Could not find main JS filename");
|
||||
hashes.main = mainMatch[1];
|
||||
|
||||
return hashes;
|
||||
}
|
||||
|
||||
// ── Step 2: Extract graph nodes from chunk 7669 ─────────────────────────────
|
||||
|
||||
function extractGraphNodes(chunkSrc) {
|
||||
const nodes = [];
|
||||
const re =
|
||||
/\{id:"(\d+)",name:"([^"]+)",backgroundColor:"([^"]+)"(?:,parentId:\[([^\]]*)\])?\}/g;
|
||||
let m;
|
||||
while ((m = re.exec(chunkSrc))) {
|
||||
const [, id, name, , parentStr] = m;
|
||||
const parents = parentStr
|
||||
? parentStr
|
||||
.split(",")
|
||||
.map((s) => s.replace(/"/g, "").trim())
|
||||
.filter(Boolean)
|
||||
: [];
|
||||
nodes.push({ id, name, prerequisites: parents });
|
||||
}
|
||||
return nodes;
|
||||
}
|
||||
|
||||
// ── Step 3: Extract problems from main bundle ───────────────────────────────
|
||||
|
||||
function extractProblems(mainSrc) {
|
||||
const problems = [];
|
||||
const re =
|
||||
/\{problem:"([^"]+)",pattern:"([^"]+)",link:"([^"]+)",video:"([^"]*)",difficulty:"(\w+)",code:"([^"]+)"/g;
|
||||
let m;
|
||||
while ((m = re.exec(mainSrc))) {
|
||||
const [, name, pattern, link, video, difficulty, code] = m;
|
||||
const obj = { name, pattern, difficulty, code, link };
|
||||
if (video) obj.video = video;
|
||||
|
||||
const ctxStart = Math.max(0, m.index - 50);
|
||||
const ctxEnd = Math.min(mainSrc.length, m.index + m[0].length + 200);
|
||||
const ctx = mainSrc.slice(ctxStart, ctxEnd);
|
||||
if (/neetcode150:!0/.test(ctx)) obj.neetcode150 = true;
|
||||
if (/blind75:!0/.test(ctx)) obj.blind75 = true;
|
||||
if (/neetcode250:!0/.test(ctx)) obj.neetcode250 = true;
|
||||
if (/premium:!0/.test(ctx)) obj.premium = true;
|
||||
|
||||
problems.push(obj);
|
||||
}
|
||||
return problems;
|
||||
}
|
||||
|
||||
// ── Step 4: Extract course links from chunk 7669 ────────────────────────────
|
||||
|
||||
function extractCourses(chunkSrc) {
|
||||
const courses = {};
|
||||
const re =
|
||||
/"([^"]+)":\[\{course:"([^"]+)",name:"([^"]+)",routerLink:"([^"]+)"\}/g;
|
||||
let m;
|
||||
while ((m = re.exec(chunkSrc))) {
|
||||
const topic = m[1];
|
||||
const arrStart = m.index + topic.length + 2;
|
||||
const arrEnd = chunkSrc.indexOf("]", arrStart);
|
||||
const arrStr = chunkSrc.slice(arrStart, arrEnd + 1);
|
||||
const items = [];
|
||||
const itemRe =
|
||||
/\{course:"([^"]+)",name:"([^"]+)",routerLink:"([^"]+)"\}/g;
|
||||
let im;
|
||||
while ((im = itemRe.exec(arrStr))) {
|
||||
items.push({ course: im[1], name: im[2], routerLink: im[3] });
|
||||
}
|
||||
if (items.length) courses[topic] = items;
|
||||
}
|
||||
return courses;
|
||||
}
|
||||
|
||||
// ── Topological sort ────────────────────────────────────────────────────────
|
||||
|
||||
function topoSort(nodes) {
|
||||
const byId = Object.fromEntries(nodes.map((n) => [n.id, n]));
|
||||
const visited = new Set();
|
||||
const result = [];
|
||||
|
||||
function visit(id) {
|
||||
if (visited.has(id)) return;
|
||||
visited.add(id);
|
||||
const node = byId[id];
|
||||
if (!node) return;
|
||||
for (const p of node.prerequisites) visit(p);
|
||||
result.push(node);
|
||||
}
|
||||
|
||||
for (const n of nodes) visit(n.id);
|
||||
return result;
|
||||
}
|
||||
|
||||
// ── Build DOT graph ─────────────────────────────────────────────────────────
|
||||
|
||||
function buildDot(nodes) {
|
||||
const lines = [
|
||||
"digraph NeetCodeRoadmap {",
|
||||
' rankdir=TB;',
|
||||
' node [shape=box, style="rounded,filled", fillcolor="#3f4bd1", fontcolor=white, fontname="Helvetica"];',
|
||||
' edge [color="#555555", arrowsize=0.8];',
|
||||
"",
|
||||
];
|
||||
|
||||
for (const n of nodes) {
|
||||
const label = n.name.replace(/ \/ /g, "\\n").replace(/ /g, "\\n");
|
||||
lines.push(` "${n.id}" [label="${label}"];`);
|
||||
}
|
||||
lines.push("");
|
||||
|
||||
for (const n of nodes) {
|
||||
for (const p of n.prerequisites) {
|
||||
lines.push(` "${p}" -> "${n.id}";`);
|
||||
}
|
||||
}
|
||||
|
||||
lines.push("}");
|
||||
return lines.join("\n") + "\n";
|
||||
}
|
||||
|
||||
// ── Build org-mode file ─────────────────────────────────────────────────────
|
||||
|
||||
function buildOrg(sortedNodes, problemsByTopic) {
|
||||
const lines = [];
|
||||
const now = new Date().toISOString().slice(0, 10);
|
||||
|
||||
lines.push("#+TITLE: NeetCode Roadmap");
|
||||
lines.push(`#+DATE: ${now}`);
|
||||
lines.push("#+TODO: TODO DONE");
|
||||
lines.push("#+STARTUP: overview");
|
||||
lines.push("");
|
||||
lines.push("Source: [[https://neetcode.io/roadmap][neetcode.io/roadmap]]");
|
||||
lines.push("");
|
||||
|
||||
const difficultyTag = (d) =>
|
||||
d === "Easy" ? "easy" : d === "Medium" ? "medium" : "hard";
|
||||
|
||||
for (const node of sortedNodes) {
|
||||
const topicProblems = (problemsByTopic[node.name] || []).filter(
|
||||
(p) => p.neetcode150
|
||||
);
|
||||
lines.push(`* TODO ${node.name} [/]`);
|
||||
lines.push("");
|
||||
|
||||
if (topicProblems.length === 0) {
|
||||
lines.push(" (no NeetCode 150 problems)");
|
||||
lines.push("");
|
||||
continue;
|
||||
}
|
||||
|
||||
for (const p of topicProblems) {
|
||||
const tag = difficultyTag(p.difficulty);
|
||||
const lcUrl = `${LEETCODE_BASE}${p.link}`;
|
||||
const num = p.code.split("-")[0];
|
||||
lines.push(`- [ ] TODO ${num}. ${p.name} :${tag}:`);
|
||||
lines.push(` - [ ] TODO Python: [[${GITHUB_SOLUTIONS}python/${p.code}.py][${p.code}.py]]`);
|
||||
lines.push(` - [ ] TODO C++: [[${GITHUB_SOLUTIONS}cpp/${p.code}.cpp][${p.code}.cpp]]`);
|
||||
lines.push(` - LeetCode: [[${lcUrl}][${p.link}]]`);
|
||||
if (p.video)
|
||||
lines.push(
|
||||
` - Video: [[https://youtube.com/watch?v=${p.video}][explanation]]`
|
||||
);
|
||||
}
|
||||
lines.push("");
|
||||
}
|
||||
|
||||
return lines.join("\n");
|
||||
}
|
||||
|
||||
// ── Main ────────────────────────────────────────────────────────────────────
|
||||
|
||||
async function main() {
|
||||
const hashes = await getChunkHashes();
|
||||
|
||||
const [graphChunk, mainSrc] = await Promise.all([
|
||||
fetchText(
|
||||
`${BASE}/${GRAPH_DATA_CHUNK_ID}.${hashes[GRAPH_DATA_CHUNK_ID]}.js`,
|
||||
`${GRAPH_DATA_CHUNK_ID}.${hashes[GRAPH_DATA_CHUNK_ID]}.js`
|
||||
),
|
||||
fetchText(`${BASE}/${hashes.main}`, hashes.main),
|
||||
]);
|
||||
|
||||
const nodes = extractGraphNodes(graphChunk);
|
||||
const problems = extractProblems(mainSrc);
|
||||
const courses = extractCourses(graphChunk);
|
||||
|
||||
// Build edges from prerequisites
|
||||
const edges = [];
|
||||
for (const n of nodes) {
|
||||
for (const p of n.prerequisites) {
|
||||
edges.push({ from: p, to: n.id, meaning: "prerequisite" });
|
||||
}
|
||||
}
|
||||
|
||||
// Group problems by topic
|
||||
const problemsByTopic = {};
|
||||
for (const p of problems) {
|
||||
if (!problemsByTopic[p.pattern]) problemsByTopic[p.pattern] = [];
|
||||
problemsByTopic[p.pattern].push(p);
|
||||
}
|
||||
|
||||
// NeetCode 150 only
|
||||
const nc150Problems = problems.filter((p) => p.neetcode150);
|
||||
const nc150ByTopic = {};
|
||||
for (const p of nc150Problems) {
|
||||
if (!nc150ByTopic[p.pattern]) nc150ByTopic[p.pattern] = [];
|
||||
nc150ByTopic[p.pattern].push(p);
|
||||
}
|
||||
|
||||
// Topological sort for org output
|
||||
const sorted = topoSort(nodes);
|
||||
|
||||
const result = {
|
||||
source: "https://neetcode.io/roadmap",
|
||||
extracted: new Date().toISOString().slice(0, 10),
|
||||
graph: { nodes, edges },
|
||||
problemsByTopic,
|
||||
coursesByTopic: courses,
|
||||
stats: {
|
||||
topics: nodes.length,
|
||||
edges: edges.length,
|
||||
totalProblems: problems.length,
|
||||
neetcode150: nc150Problems.length,
|
||||
},
|
||||
};
|
||||
|
||||
if (stdoutMode) {
|
||||
process.stdout.write(JSON.stringify(result, null, 2) + "\n");
|
||||
} else {
|
||||
mkdirSync(outDir, { recursive: true });
|
||||
|
||||
// Full data
|
||||
writeFileSync(
|
||||
join(outDir, "roadmap.json"),
|
||||
JSON.stringify(result, null, 2) + "\n",
|
||||
"utf8"
|
||||
);
|
||||
|
||||
// NeetCode 150 only
|
||||
const nc150Result = {
|
||||
source: result.source,
|
||||
extracted: result.extracted,
|
||||
graph: result.graph,
|
||||
problemsByTopic: nc150ByTopic,
|
||||
coursesByTopic: courses,
|
||||
stats: {
|
||||
topics: nodes.length,
|
||||
edges: edges.length,
|
||||
problems: nc150Problems.length,
|
||||
},
|
||||
};
|
||||
writeFileSync(
|
||||
join(outDir, "roadmap-neetcode150.json"),
|
||||
JSON.stringify(nc150Result, null, 2) + "\n",
|
||||
"utf8"
|
||||
);
|
||||
|
||||
// DOT
|
||||
writeFileSync(join(outDir, "roadmap.dot"), buildDot(nodes), "utf8");
|
||||
|
||||
// Org-mode
|
||||
writeFileSync(
|
||||
join(outDir, "roadmap.org"),
|
||||
buildOrg(sorted, problemsByTopic),
|
||||
"utf8"
|
||||
);
|
||||
|
||||
console.log(`Wrote ${outDir}/roadmap.json (${result.stats.totalProblems} problems total)`);
|
||||
console.log(`Wrote ${outDir}/roadmap-neetcode150.json (${result.stats.neetcode150} problems)`);
|
||||
console.log(`Wrote ${outDir}/roadmap.dot`);
|
||||
console.log(`Wrote ${outDir}/roadmap.org`);
|
||||
console.log(
|
||||
` ${result.stats.topics} topics, ${result.stats.edges} edges`
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
main().catch((err) => {
|
||||
console.error(err);
|
||||
process.exit(1);
|
||||
});
|
||||
Reference in New Issue
Block a user