#!/usr/bin/env node /** * NeetCode Roadmap Extractor * * Fetches the NeetCode roadmap data (dependency graph + problems) * from the live site and outputs structured JSON, DOT, and org-mode. * * Idempotent: same input always produces the same output. * * Usage: * node extract.mjs # write to ./out/ * node extract.mjs --stdout # print JSON to stdout * node extract.mjs --cache /tmp/nc # cache downloads in dir */ import { writeFileSync, mkdirSync, readFileSync, existsSync } from "node:fs"; import { join, dirname } from "node:path"; import { fileURLToPath } from "node:url"; const __dirname = dirname(fileURLToPath(import.meta.url)); // ── Config ────────────────────────────────────────────────────────────────── const BASE = "https://neetcode.io"; const ROADMAP_CHUNK_ID = 8998; // exports ROADMAP_ROUTES const GRAPH_DATA_CHUNK_ID = 7669; // contains the actual graph nodes const LEETCODE_BASE = "https://leetcode.com/problems/"; const GITHUB_SOLUTIONS = "https://github.com/neetcode-gh/leetcode/blob/main/"; const args = process.argv.slice(2); const stdoutMode = args.includes("--stdout"); const cacheDir = args.includes("--cache") ? args[args.indexOf("--cache") + 1] : join(__dirname, ".cache"); const outDir = join(__dirname, "out"); // ── Fetch with optional disk cache ────────────────────────────────────────── async function fetchText(url, cacheKey) { const cachePath = join(cacheDir, cacheKey); if (existsSync(cachePath)) { return readFileSync(cachePath, "utf8"); } const res = await fetch(url); if (!res.ok) throw new Error(`Failed to fetch ${url}: ${res.status}`); const text = await res.text(); mkdirSync(dirname(cachePath), { recursive: true }); writeFileSync(cachePath, text, "utf8"); return text; } // ── Step 1: Get chunk hashes from runtime ─────────────────────────────────── async function getChunkHashes() { const html = await fetchText(`${BASE}/roadmap`, "roadmap.html"); const runtimeMatch = html.match(/src="(runtime\.[a-f0-9]+\.js)"/); if (!runtimeMatch) throw new Error("Could not find runtime JS filename"); const runtimeName = runtimeMatch[1]; const runtime = await fetchText(`${BASE}/${runtimeName}`, runtimeName); const hashes = {}; for (const id of [ROADMAP_CHUNK_ID, GRAPH_DATA_CHUNK_ID]) { const m = runtime.match(new RegExp(`${id}:"([a-f0-9]+)"`)); if (!m) throw new Error(`Could not find hash for chunk ${id}`); hashes[id] = m[1]; } const mainMatch = html.match(/src="(main\.[a-f0-9]+\.js)"/); if (!mainMatch) throw new Error("Could not find main JS filename"); hashes.main = mainMatch[1]; return hashes; } // ── Step 2: Extract graph nodes from chunk 7669 ───────────────────────────── function extractGraphNodes(chunkSrc) { const nodes = []; const re = /\{id:"(\d+)",name:"([^"]+)",backgroundColor:"([^"]+)"(?:,parentId:\[([^\]]*)\])?\}/g; let m; while ((m = re.exec(chunkSrc))) { const [, id, name, , parentStr] = m; const parents = parentStr ? parentStr .split(",") .map((s) => s.replace(/"/g, "").trim()) .filter(Boolean) : []; nodes.push({ id, name, prerequisites: parents }); } return nodes; } // ── Step 3: Extract problems from main bundle ─────────────────────────────── function extractProblems(mainSrc) { const problems = []; const re = /\{problem:"([^"]+)",pattern:"([^"]+)",link:"([^"]+)",video:"([^"]*)",difficulty:"(\w+)",code:"([^"]+)"/g; let m; while ((m = re.exec(mainSrc))) { const [, name, pattern, link, video, difficulty, code] = m; const obj = { name, pattern, difficulty, code, link }; if (video) obj.video = video; const ctxStart = Math.max(0, m.index - 50); const ctxEnd = Math.min(mainSrc.length, m.index + m[0].length + 200); const ctx = mainSrc.slice(ctxStart, ctxEnd); if (/neetcode150:!0/.test(ctx)) obj.neetcode150 = true; if (/blind75:!0/.test(ctx)) obj.blind75 = true; if (/neetcode250:!0/.test(ctx)) obj.neetcode250 = true; if (/premium:!0/.test(ctx)) obj.premium = true; problems.push(obj); } return problems; } // ── Step 4: Extract course links from chunk 7669 ──────────────────────────── function extractCourses(chunkSrc) { const courses = {}; const re = /"([^"]+)":\[\{course:"([^"]+)",name:"([^"]+)",routerLink:"([^"]+)"\}/g; let m; while ((m = re.exec(chunkSrc))) { const topic = m[1]; const arrStart = m.index + topic.length + 2; const arrEnd = chunkSrc.indexOf("]", arrStart); const arrStr = chunkSrc.slice(arrStart, arrEnd + 1); const items = []; const itemRe = /\{course:"([^"]+)",name:"([^"]+)",routerLink:"([^"]+)"\}/g; let im; while ((im = itemRe.exec(arrStr))) { items.push({ course: im[1], name: im[2], routerLink: im[3] }); } if (items.length) courses[topic] = items; } return courses; } // ── Topological sort ──────────────────────────────────────────────────────── function topoSort(nodes) { const byId = Object.fromEntries(nodes.map((n) => [n.id, n])); const visited = new Set(); const result = []; function visit(id) { if (visited.has(id)) return; visited.add(id); const node = byId[id]; if (!node) return; for (const p of node.prerequisites) visit(p); result.push(node); } for (const n of nodes) visit(n.id); return result; } // ── Build DOT graph ───────────────────────────────────────────────────────── function buildDot(nodes) { const lines = [ "digraph NeetCodeRoadmap {", ' rankdir=TB;', ' node [shape=box, style="rounded,filled", fillcolor="#3f4bd1", fontcolor=white, fontname="Helvetica"];', ' edge [color="#555555", arrowsize=0.8];', "", ]; for (const n of nodes) { const label = n.name.replace(/ \/ /g, "\\n").replace(/ /g, "\\n"); lines.push(` "${n.id}" [label="${label}"];`); } lines.push(""); for (const n of nodes) { for (const p of n.prerequisites) { lines.push(` "${p}" -> "${n.id}";`); } } lines.push("}"); return lines.join("\n") + "\n"; } // ── Build org-mode file ───────────────────────────────────────────────────── function buildOrg(sortedNodes, problemsByTopic) { const lines = []; const now = new Date().toISOString().slice(0, 10); lines.push("#+TITLE: NeetCode Roadmap"); lines.push("#+PROPERTY: STUDY_DECK_02"); lines.push(`#+DATE: ${now}`); lines.push("#+TODO: TODO DONE"); lines.push("#+STARTUP: overview"); lines.push(""); lines.push("Source: [[https://neetcode.io/roadmap][neetcode.io/roadmap]]"); lines.push(""); const difficultyTag = (d) => d === "Easy" ? "easy" : d === "Medium" ? "medium" : "hard"; const topicSlug = (name) => name .toLowerCase() .replace(/[^a-z0-9]+/g, "-") .replace(/(^-|-$)/g, ""); const notesRoot = "dsa"; for (const node of sortedNodes) { const topicProblems = (problemsByTopic[node.name] || []).filter( (p) => p.neetcode150 ); const slug = topicSlug(node.name); lines.push(`* TODO ${node.name} [/]`); lines.push(""); if (topicProblems.length === 0) { lines.push(" (no NeetCode 150 problems)"); lines.push(""); continue; } for (const p of topicProblems) { const tag = difficultyTag(p.difficulty); const lcUrl = `${LEETCODE_BASE}${p.link}`; const num = p.code.split("-")[0]; const notesFile = `${notesRoot}/${slug}/${p.code}.org`; lines.push(`** TODO ${num}. ${p.name} :${tag}:`); lines.push(`:PROPERTIES:`); lines.push(`:LEETCODE: [[${lcUrl}][Problem]]`); lines.push(`:CPP: [[${GITHUB_SOLUTIONS}cpp/${p.code}.cpp][Solution]]`); lines.push(`:PYTHON: [[${GITHUB_SOLUTIONS}python/${p.code}.py][Solution]]`); if (p.video) lines.push( `:VIDEO: [[https://youtube.com/watch?v=${p.video}][Watch]]` ); lines.push(`:END:`); lines.push(""); lines.push(`*** TODO Python`); lines.push(`*** TODO C++`); lines.push(`Notes: [[file:${notesFile}]]`); } lines.push(""); } return lines.join("\n"); } // ── Main ──────────────────────────────────────────────────────────────────── async function main() { const hashes = await getChunkHashes(); const [graphChunk, mainSrc] = await Promise.all([ fetchText( `${BASE}/${GRAPH_DATA_CHUNK_ID}.${hashes[GRAPH_DATA_CHUNK_ID]}.js`, `${GRAPH_DATA_CHUNK_ID}.${hashes[GRAPH_DATA_CHUNK_ID]}.js` ), fetchText(`${BASE}/${hashes.main}`, hashes.main), ]); const nodes = extractGraphNodes(graphChunk); const problems = extractProblems(mainSrc); const courses = extractCourses(graphChunk); // Build edges from prerequisites const edges = []; for (const n of nodes) { for (const p of n.prerequisites) { edges.push({ from: p, to: n.id, meaning: "prerequisite" }); } } // Group problems by topic const problemsByTopic = {}; for (const p of problems) { if (!problemsByTopic[p.pattern]) problemsByTopic[p.pattern] = []; problemsByTopic[p.pattern].push(p); } // NeetCode 150 only const nc150Problems = problems.filter((p) => p.neetcode150); const nc150ByTopic = {}; for (const p of nc150Problems) { if (!nc150ByTopic[p.pattern]) nc150ByTopic[p.pattern] = []; nc150ByTopic[p.pattern].push(p); } // Topological sort for org output const sorted = topoSort(nodes); const result = { source: "https://neetcode.io/roadmap", extracted: new Date().toISOString().slice(0, 10), graph: { nodes, edges }, problemsByTopic, coursesByTopic: courses, stats: { topics: nodes.length, edges: edges.length, totalProblems: problems.length, neetcode150: nc150Problems.length, }, }; if (stdoutMode) { process.stdout.write(JSON.stringify(result, null, 2) + "\n"); } else { mkdirSync(outDir, { recursive: true }); // Full data writeFileSync( join(outDir, "roadmap.json"), JSON.stringify(result, null, 2) + "\n", "utf8" ); // NeetCode 150 only const nc150Result = { source: result.source, extracted: result.extracted, graph: result.graph, problemsByTopic: nc150ByTopic, coursesByTopic: courses, stats: { topics: nodes.length, edges: edges.length, problems: nc150Problems.length, }, }; writeFileSync( join(outDir, "roadmap-neetcode150.json"), JSON.stringify(nc150Result, null, 2) + "\n", "utf8" ); // DOT writeFileSync(join(outDir, "roadmap.dot"), buildDot(nodes), "utf8"); // Org-mode — write to org/study_deck_02/roadmap.org const orgDir = join(__dirname, "../org/study_deck_02"); mkdirSync(orgDir, { recursive: true }); writeFileSync( join(orgDir, "roadmap.org"), buildOrg(sorted, problemsByTopic), "utf8" ); console.log(`Wrote ${outDir}/roadmap.json (${result.stats.totalProblems} problems total)`); console.log(`Wrote ${outDir}/roadmap-neetcode150.json (${result.stats.neetcode150} problems)`); console.log(`Wrote ${outDir}/roadmap.dot`); console.log(`Wrote ${join(orgDir, "roadmap.org")}`); console.log( ` ${result.stats.topics} topics, ${result.stats.edges} edges` ); } } main().catch((err) => { console.error(err); process.exit(1); });