feat: add NeetCode roadmap extractor with dependency graph

- extract.mjs: idempotent script that fetches neetcode.io JS chunks,
  extracts topic dependency graph (18 topics, 21 edges) and problems
  (965 total, 199 NeetCode 150)
- out/roadmap.json: full data (graph + all problems + courses)
- out/roadmap-neetcode150.json: filtered to NeetCode 150 only
- out/roadmap.dot: Graphviz visualization
- out/roadmap.org: org-mode with TODO checklists, Python/C++ links
- neetcode-roadmap-graph.json: standalone edge list
- neetcode-roadmap.dot: standalone DOT file

Also reformats subarray table in qn_00.org
This commit is contained in:
2026-06-01 02:07:20 +08:00
parent f603236a48
commit b4f25ab87b
9 changed files with 12755 additions and 21 deletions
+1
View File
@@ -0,0 +1 @@
.cache/
+353
View File
@@ -0,0 +1,353 @@
#!/usr/bin/env node
/**
* NeetCode Roadmap Extractor
*
* Fetches the NeetCode roadmap data (dependency graph + problems)
* from the live site and outputs structured JSON, DOT, and org-mode.
*
* Idempotent: same input always produces the same output.
*
* Usage:
* node extract.mjs # write to ./out/
* node extract.mjs --stdout # print JSON to stdout
* node extract.mjs --cache /tmp/nc # cache downloads in dir
*/
import { writeFileSync, mkdirSync, readFileSync, existsSync } from "node:fs";
import { join, dirname } from "node:path";
import { fileURLToPath } from "node:url";
const __dirname = dirname(fileURLToPath(import.meta.url));
// ── Config ──────────────────────────────────────────────────────────────────
const BASE = "https://neetcode.io";
const ROADMAP_CHUNK_ID = 8998; // exports ROADMAP_ROUTES
const GRAPH_DATA_CHUNK_ID = 7669; // contains the actual graph nodes
const LEETCODE_BASE = "https://leetcode.com/problems/";
const GITHUB_SOLUTIONS =
"https://github.com/neetcode-gh/leetcode/blob/main/";
const args = process.argv.slice(2);
const stdoutMode = args.includes("--stdout");
const cacheDir = args.includes("--cache")
? args[args.indexOf("--cache") + 1]
: join(__dirname, ".cache");
const outDir = join(__dirname, "out");
// ── Fetch with optional disk cache ──────────────────────────────────────────
async function fetchText(url, cacheKey) {
const cachePath = join(cacheDir, cacheKey);
if (existsSync(cachePath)) {
return readFileSync(cachePath, "utf8");
}
const res = await fetch(url);
if (!res.ok) throw new Error(`Failed to fetch ${url}: ${res.status}`);
const text = await res.text();
mkdirSync(dirname(cachePath), { recursive: true });
writeFileSync(cachePath, text, "utf8");
return text;
}
// ── Step 1: Get chunk hashes from runtime ───────────────────────────────────
async function getChunkHashes() {
const html = await fetchText(`${BASE}/roadmap`, "roadmap.html");
const runtimeMatch = html.match(/src="(runtime\.[a-f0-9]+\.js)"/);
if (!runtimeMatch) throw new Error("Could not find runtime JS filename");
const runtimeName = runtimeMatch[1];
const runtime = await fetchText(`${BASE}/${runtimeName}`, runtimeName);
const hashes = {};
for (const id of [ROADMAP_CHUNK_ID, GRAPH_DATA_CHUNK_ID]) {
const m = runtime.match(new RegExp(`${id}:"([a-f0-9]+)"`));
if (!m) throw new Error(`Could not find hash for chunk ${id}`);
hashes[id] = m[1];
}
const mainMatch = html.match(/src="(main\.[a-f0-9]+\.js)"/);
if (!mainMatch) throw new Error("Could not find main JS filename");
hashes.main = mainMatch[1];
return hashes;
}
// ── Step 2: Extract graph nodes from chunk 7669 ─────────────────────────────
function extractGraphNodes(chunkSrc) {
const nodes = [];
const re =
/\{id:"(\d+)",name:"([^"]+)",backgroundColor:"([^"]+)"(?:,parentId:\[([^\]]*)\])?\}/g;
let m;
while ((m = re.exec(chunkSrc))) {
const [, id, name, , parentStr] = m;
const parents = parentStr
? parentStr
.split(",")
.map((s) => s.replace(/"/g, "").trim())
.filter(Boolean)
: [];
nodes.push({ id, name, prerequisites: parents });
}
return nodes;
}
// ── Step 3: Extract problems from main bundle ───────────────────────────────
function extractProblems(mainSrc) {
const problems = [];
const re =
/\{problem:"([^"]+)",pattern:"([^"]+)",link:"([^"]+)",video:"([^"]*)",difficulty:"(\w+)",code:"([^"]+)"/g;
let m;
while ((m = re.exec(mainSrc))) {
const [, name, pattern, link, video, difficulty, code] = m;
const obj = { name, pattern, difficulty, code, link };
if (video) obj.video = video;
const ctxStart = Math.max(0, m.index - 50);
const ctxEnd = Math.min(mainSrc.length, m.index + m[0].length + 200);
const ctx = mainSrc.slice(ctxStart, ctxEnd);
if (/neetcode150:!0/.test(ctx)) obj.neetcode150 = true;
if (/blind75:!0/.test(ctx)) obj.blind75 = true;
if (/neetcode250:!0/.test(ctx)) obj.neetcode250 = true;
if (/premium:!0/.test(ctx)) obj.premium = true;
problems.push(obj);
}
return problems;
}
// ── Step 4: Extract course links from chunk 7669 ────────────────────────────
function extractCourses(chunkSrc) {
const courses = {};
const re =
/"([^"]+)":\[\{course:"([^"]+)",name:"([^"]+)",routerLink:"([^"]+)"\}/g;
let m;
while ((m = re.exec(chunkSrc))) {
const topic = m[1];
const arrStart = m.index + topic.length + 2;
const arrEnd = chunkSrc.indexOf("]", arrStart);
const arrStr = chunkSrc.slice(arrStart, arrEnd + 1);
const items = [];
const itemRe =
/\{course:"([^"]+)",name:"([^"]+)",routerLink:"([^"]+)"\}/g;
let im;
while ((im = itemRe.exec(arrStr))) {
items.push({ course: im[1], name: im[2], routerLink: im[3] });
}
if (items.length) courses[topic] = items;
}
return courses;
}
// ── Topological sort ────────────────────────────────────────────────────────
function topoSort(nodes) {
const byId = Object.fromEntries(nodes.map((n) => [n.id, n]));
const visited = new Set();
const result = [];
function visit(id) {
if (visited.has(id)) return;
visited.add(id);
const node = byId[id];
if (!node) return;
for (const p of node.prerequisites) visit(p);
result.push(node);
}
for (const n of nodes) visit(n.id);
return result;
}
// ── Build DOT graph ─────────────────────────────────────────────────────────
function buildDot(nodes) {
const lines = [
"digraph NeetCodeRoadmap {",
' rankdir=TB;',
' node [shape=box, style="rounded,filled", fillcolor="#3f4bd1", fontcolor=white, fontname="Helvetica"];',
' edge [color="#555555", arrowsize=0.8];',
"",
];
for (const n of nodes) {
const label = n.name.replace(/ \/ /g, "\\n").replace(/ /g, "\\n");
lines.push(` "${n.id}" [label="${label}"];`);
}
lines.push("");
for (const n of nodes) {
for (const p of n.prerequisites) {
lines.push(` "${p}" -> "${n.id}";`);
}
}
lines.push("}");
return lines.join("\n") + "\n";
}
// ── Build org-mode file ─────────────────────────────────────────────────────
function buildOrg(sortedNodes, problemsByTopic) {
const lines = [];
const now = new Date().toISOString().slice(0, 10);
lines.push("#+TITLE: NeetCode Roadmap");
lines.push(`#+DATE: ${now}`);
lines.push("#+TODO: TODO DONE");
lines.push("#+STARTUP: overview");
lines.push("");
lines.push("Source: [[https://neetcode.io/roadmap][neetcode.io/roadmap]]");
lines.push("");
const difficultyTag = (d) =>
d === "Easy" ? "easy" : d === "Medium" ? "medium" : "hard";
for (const node of sortedNodes) {
const topicProblems = (problemsByTopic[node.name] || []).filter(
(p) => p.neetcode150
);
lines.push(`* TODO ${node.name} [/]`);
lines.push("");
if (topicProblems.length === 0) {
lines.push(" (no NeetCode 150 problems)");
lines.push("");
continue;
}
for (const p of topicProblems) {
const tag = difficultyTag(p.difficulty);
const lcUrl = `${LEETCODE_BASE}${p.link}`;
const num = p.code.split("-")[0];
lines.push(`- [ ] TODO ${num}. ${p.name} :${tag}:`);
lines.push(` - [ ] TODO Python: [[${GITHUB_SOLUTIONS}python/${p.code}.py][${p.code}.py]]`);
lines.push(` - [ ] TODO C++: [[${GITHUB_SOLUTIONS}cpp/${p.code}.cpp][${p.code}.cpp]]`);
lines.push(` - LeetCode: [[${lcUrl}][${p.link}]]`);
if (p.video)
lines.push(
` - Video: [[https://youtube.com/watch?v=${p.video}][explanation]]`
);
}
lines.push("");
}
return lines.join("\n");
}
// ── Main ────────────────────────────────────────────────────────────────────
async function main() {
const hashes = await getChunkHashes();
const [graphChunk, mainSrc] = await Promise.all([
fetchText(
`${BASE}/${GRAPH_DATA_CHUNK_ID}.${hashes[GRAPH_DATA_CHUNK_ID]}.js`,
`${GRAPH_DATA_CHUNK_ID}.${hashes[GRAPH_DATA_CHUNK_ID]}.js`
),
fetchText(`${BASE}/${hashes.main}`, hashes.main),
]);
const nodes = extractGraphNodes(graphChunk);
const problems = extractProblems(mainSrc);
const courses = extractCourses(graphChunk);
// Build edges from prerequisites
const edges = [];
for (const n of nodes) {
for (const p of n.prerequisites) {
edges.push({ from: p, to: n.id, meaning: "prerequisite" });
}
}
// Group problems by topic
const problemsByTopic = {};
for (const p of problems) {
if (!problemsByTopic[p.pattern]) problemsByTopic[p.pattern] = [];
problemsByTopic[p.pattern].push(p);
}
// NeetCode 150 only
const nc150Problems = problems.filter((p) => p.neetcode150);
const nc150ByTopic = {};
for (const p of nc150Problems) {
if (!nc150ByTopic[p.pattern]) nc150ByTopic[p.pattern] = [];
nc150ByTopic[p.pattern].push(p);
}
// Topological sort for org output
const sorted = topoSort(nodes);
const result = {
source: "https://neetcode.io/roadmap",
extracted: new Date().toISOString().slice(0, 10),
graph: { nodes, edges },
problemsByTopic,
coursesByTopic: courses,
stats: {
topics: nodes.length,
edges: edges.length,
totalProblems: problems.length,
neetcode150: nc150Problems.length,
},
};
if (stdoutMode) {
process.stdout.write(JSON.stringify(result, null, 2) + "\n");
} else {
mkdirSync(outDir, { recursive: true });
// Full data
writeFileSync(
join(outDir, "roadmap.json"),
JSON.stringify(result, null, 2) + "\n",
"utf8"
);
// NeetCode 150 only
const nc150Result = {
source: result.source,
extracted: result.extracted,
graph: result.graph,
problemsByTopic: nc150ByTopic,
coursesByTopic: courses,
stats: {
topics: nodes.length,
edges: edges.length,
problems: nc150Problems.length,
},
};
writeFileSync(
join(outDir, "roadmap-neetcode150.json"),
JSON.stringify(nc150Result, null, 2) + "\n",
"utf8"
);
// DOT
writeFileSync(join(outDir, "roadmap.dot"), buildDot(nodes), "utf8");
// Org-mode
writeFileSync(
join(outDir, "roadmap.org"),
buildOrg(sorted, problemsByTopic),
"utf8"
);
console.log(`Wrote ${outDir}/roadmap.json (${result.stats.totalProblems} problems total)`);
console.log(`Wrote ${outDir}/roadmap-neetcode150.json (${result.stats.neetcode150} problems)`);
console.log(`Wrote ${outDir}/roadmap.dot`);
console.log(`Wrote ${outDir}/roadmap.org`);
console.log(
` ${result.stats.topics} topics, ${result.stats.edges} edges`
);
}
}
main().catch((err) => {
console.error(err);
process.exit(1);
});
+47
View File
@@ -0,0 +1,47 @@
{
"source": "https://neetcode.io/roadmap",
"extracted": "2026-06-01",
"nodes": [
{ "id": "1", "name": "Arrays & Hashing" },
{ "id": "2", "name": "Two Pointers" },
{ "id": "3", "name": "Stack" },
{ "id": "4", "name": "Sliding Window" },
{ "id": "5", "name": "Linked List" },
{ "id": "6", "name": "Binary Search" },
{ "id": "7", "name": "Trees" },
{ "id": "8", "name": "Tries" },
{ "id": "9", "name": "Heap / Priority Queue" },
{ "id": "10", "name": "Backtracking" },
{ "id": "11", "name": "Graphs" },
{ "id": "12", "name": "1-D Dynamic Programming" },
{ "id": "13", "name": "Intervals" },
{ "id": "14", "name": "2-D Dynamic Programming" },
{ "id": "15", "name": "Bit Manipulation" },
{ "id": "16", "name": "Greedy" },
{ "id": "17", "name": "Advanced Graphs" },
{ "id": "18", "name": "Math & Geometry" }
],
"edges": [
{ "from": "1", "to": "2", "meaning": "prerequisite" },
{ "from": "1", "to": "3", "meaning": "prerequisite" },
{ "from": "2", "to": "4", "meaning": "prerequisite" },
{ "from": "2", "to": "5", "meaning": "prerequisite" },
{ "from": "2", "to": "6", "meaning": "prerequisite" },
{ "from": "5", "to": "7", "meaning": "prerequisite" },
{ "from": "6", "to": "7", "meaning": "prerequisite" },
{ "from": "7", "to": "8", "meaning": "prerequisite" },
{ "from": "7", "to": "9", "meaning": "prerequisite" },
{ "from": "7", "to": "10", "meaning": "prerequisite" },
{ "from": "10", "to": "11", "meaning": "prerequisite" },
{ "from": "10", "to": "12", "meaning": "prerequisite" },
{ "from": "9", "to": "13", "meaning": "prerequisite" },
{ "from": "9", "to": "16", "meaning": "prerequisite" },
{ "from": "9", "to": "17", "meaning": "prerequisite" },
{ "from": "11", "to": "14", "meaning": "prerequisite" },
{ "from": "11", "to": "17", "meaning": "prerequisite" },
{ "from": "11", "to": "18", "meaning": "prerequisite" },
{ "from": "12", "to": "14", "meaning": "prerequisite" },
{ "from": "12", "to": "15", "meaning": "prerequisite" },
{ "from": "15", "to": "18", "meaning": "prerequisite" }
]
}
+55
View File
@@ -0,0 +1,55 @@
// NeetCode Roadmap — Topic Dependency Graph
// Source: https://neetcode.io/roadmap (chunk 7669)
// Extracted: 2026-06-01
//
// Render: dot -Tpng neetcode-roadmap.dot -o neetcode-roadmap.png
// or: dot -Tsvg neetcode-roadmap.dot -o neetcode-roadmap.svg
digraph NeetCodeRoadmap {
rankdir=TB;
node [shape=box, style="rounded,filled", fillcolor="#3f4bd1", fontcolor=white, fontname="Helvetica"];
edge [color="#555555", arrowsize=0.8];
// Nodes
"1" [label="Arrays &\nHashing"];
"2" [label="Two Pointers"];
"3" [label="Stack"];
"4" [label="Sliding Window"];
"5" [label="Linked List"];
"6" [label="Binary Search"];
"7" [label="Trees"];
"8" [label="Tries"];
"9" [label="Heap /\nPriority Queue"];
"10" [label="Backtracking"];
"11" [label="Graphs"];
"12" [label="1-D Dynamic\nProgramming"];
"13" [label="Intervals"];
"14" [label="2-D Dynamic\nProgramming"];
"15" [label="Bit Manipulation"];
"16" [label="Greedy"];
"17" [label="Advanced Graphs"];
"18" [label="Math &\nGeometry"];
// Edges (parentId → node means "parentId is a prerequisite of node")
"1" -> "2";
"1" -> "3";
"2" -> "4";
"2" -> "5";
"2" -> "6";
"5" -> "7";
"6" -> "7";
"7" -> "8";
"7" -> "9";
"7" -> "10";
"10" -> "11";
"10" -> "12";
"9" -> "13";
"9" -> "16";
"9" -> "17";
"11" -> "14";
"11" -> "17";
"11" -> "18";
"12" -> "14";
"12" -> "15";
"15" -> "18";
}
File diff suppressed because it is too large Load Diff
+46
View File
@@ -0,0 +1,46 @@
digraph NeetCodeRoadmap {
rankdir=TB;
node [shape=box, style="rounded,filled", fillcolor="#3f4bd1", fontcolor=white, fontname="Helvetica"];
edge [color="#555555", arrowsize=0.8];
"1" [label="Arrays\n&\nHashing"];
"2" [label="Two\nPointers"];
"6" [label="Binary\nSearch"];
"3" [label="Stack"];
"4" [label="Sliding\nWindow"];
"5" [label="Linked\nList"];
"7" [label="Trees"];
"8" [label="Tries"];
"9" [label="Heap\nPriority\nQueue"];
"10" [label="Backtracking"];
"11" [label="Graphs"];
"12" [label="1-D\nDynamic\nProgramming"];
"13" [label="Intervals"];
"16" [label="Greedy"];
"17" [label="Advanced\nGraphs"];
"18" [label="Math\n&\nGeometry"];
"14" [label="2-D\nDynamic\nProgramming"];
"15" [label="Bit\nManipulation"];
"1" -> "2";
"2" -> "6";
"1" -> "3";
"2" -> "4";
"2" -> "5";
"5" -> "7";
"6" -> "7";
"7" -> "8";
"7" -> "9";
"7" -> "10";
"10" -> "11";
"10" -> "12";
"9" -> "13";
"9" -> "16";
"9" -> "17";
"11" -> "17";
"11" -> "18";
"15" -> "18";
"11" -> "14";
"12" -> "14";
"12" -> "15";
}
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
+21 -21
View File
@@ -695,18 +695,18 @@ Time: O(n * 32), Space: O(32) per step
What is the master keyword-to-algorithm mapping for subarray problems? What is the master keyword-to-algorithm mapping for subarray problems?
** Back ** Back
| Problem Phrase | Array Property | Algorithm | | Problem Phrase | Array Property | Algorithm |
|---------------|---------------|-----------| |--------------------------------------+-----------------------+----------------------------------------------|
| "Continuous subarray + Sum = K" | Only positive numbers | **Sliding Window** (O(1) space) | | "Continuous subarray + Sum = K" | Only positive numbers | **Sliding Window** (O(1) space) |
| "Continuous subarray + Sum = K" | Positive & negative | **Prefix Sum + Hash Map** (O(n) space) | | "Continuous subarray + Sum = K" | Positive & negative | **Prefix Sum + Hash Map** (O(n) space) |
| "Divisible by K" or "Multiple of X" | Any numbers | **Prefix Remainder + Hash Map** (sum % K) | | "Divisible by K" or "Multiple of X" | Any numbers | **Prefix Remainder + Hash Map** (sum % K) |
| "Equal number of X and Y" | Any numbers | **Value Mapping** (X→1, Y→-1) + Prefix Sum Map | | "Equal number of X and Y" | Any numbers | **Value Mapping** (X→1, Y→-1) + Prefix Sum Map |
| "Maximum / Minimum Sum" | Any numbers | **Kadane's Algorithm** (DP) | | "Maximum / Minimum Sum" | Any numbers | **Kadane's Algorithm** (DP) |
| "Subarray Sum + Frequent Updates" | Element mutations | **Fenwick Tree / Segment Tree** | | "Subarray Sum + Frequent Updates" | Element mutations | **Fenwick Tree / Segment Tree** |
| "Subarray product = K" | No zeros | **Prefix Product** (division) | | "Subarray product = K" | No zeros | **Prefix Product** (division) |
| "Subarray product positive/negative" | Any numbers | **Parity tracking** of negative count | | "Subarray product positive/negative" | Any numbers | **Parity tracking** of negative count |
| "Subarray XOR = K" | Any numbers | **Prefix XOR + Hash Map** | | "Subarray XOR = K" | Any numbers | **Prefix XOR + Hash Map** |
| "Subarray OR / AND" | Any numbers | **Set of results** (bounded by 32 changes) | | "Subarray OR / AND" | Any numbers | **Set of results** (bounded by 32 changes) |
* Subarray Sum — Modular Arithmetic Insight [algorithm:interview] * Subarray Sum — Modular Arithmetic Insight [algorithm:interview]
:PROPERTIES: :PROPERTIES:
@@ -747,15 +747,15 @@ The core philosophy of prefix sums is: **accumulate history as you traverse line
This generalizes far beyond addition: This generalizes far beyond addition:
| Problem | Mapping | Reduces To | | Problem | Mapping | Reduces To |
|---------|---------|-----------| |-------------------------+--------------------------+----------------------------|
| Equal 0s and 1s | 0→-1, 1→+1 | Subarray sum = 0 | | Equal 0s and 1s | 0→-1, 1→+1 | Subarray sum = 0 |
| Equal odd/even | even→+1, odd→-1 | Subarray sum = 0 | | Equal odd/even | even→+1, odd→-1 | Subarray sum = 0 |
| Equal vowels/consonants | vowel→+1, consonant→-1 | Subarray sum = 0 | | Equal vowels/consonants | vowel→+1, consonant→-1 | Subarray sum = 0 |
| Equal A/B/C counts | Track (c_A-c_B, c_B-c_C) | Prefix state tuple repeats | | Equal A/B/C counts | Track (c_A-c_B, c_B-c_C) | Prefix state tuple repeats |
| Subarray product | Prefix product | Division (handle zeros) | | Subarray product | Prefix product | Division (handle zeros) |
| Subarray XOR | Prefix XOR | XOR is invertible | | Subarray XOR | Prefix XOR | XOR is invertible |
| Subarray sum | Prefix sum | Subtraction | | Subarray sum | Prefix sum | Subtraction |
The pattern: The pattern:
1. Define a state that accumulates as you traverse 1. Define a state that accumulates as you traverse