From 591e646a6c5dfbd635842ee64740570070eb02b4 Mon Sep 17 00:00:00 2001 From: Wong Ding Feng Date: Mon, 1 Jun 2026 01:18:14 +0800 Subject: [PATCH] feat: REPL and CLI wiring; end-to-end capture-to-spec smoke test --- src/auto_reverse/__init__.py | 5 +- src/auto_reverse/cli.py | 110 +++++++++++++++++++++++++++++++++++ src/auto_reverse/repl.py | 56 ++++++++++++++++++ tests/test_cli.py | 15 +++++ tests/test_e2e_smoke.py | 57 ++++++++++++++++++ tests/test_repl.py | 32 ++++++++++ 6 files changed, 274 insertions(+), 1 deletion(-) create mode 100644 src/auto_reverse/cli.py create mode 100644 src/auto_reverse/repl.py create mode 100644 tests/test_cli.py create mode 100644 tests/test_e2e_smoke.py create mode 100644 tests/test_repl.py diff --git a/src/auto_reverse/__init__.py b/src/auto_reverse/__init__.py index 1cefbbf..eaf95e4 100644 --- a/src/auto_reverse/__init__.py +++ b/src/auto_reverse/__init__.py @@ -1,2 +1,5 @@ +from auto_reverse.cli import run + + def main() -> None: - print("Hello from auto-reverse!") + raise SystemExit(run()) diff --git a/src/auto_reverse/cli.py b/src/auto_reverse/cli.py new file mode 100644 index 0000000..7eed198 --- /dev/null +++ b/src/auto_reverse/cli.py @@ -0,0 +1,110 @@ +from __future__ import annotations + +import argparse +import os +import sys +import threading +from datetime import UTC as DT_UTC +from datetime import datetime +from pathlib import Path +from typing import TYPE_CHECKING + +from anthropic import Anthropic + +from auto_reverse.agent import Agent +from auto_reverse.browser import Browser +from auto_reverse.config import Config +from auto_reverse.doc.client import generate_client +from auto_reverse.doc.engine import DocEngine +from auto_reverse.proxy import ProxyServer +from auto_reverse.repl import Repl +from auto_reverse.store import FlowStore, ScopeFilter +from auto_reverse.tools import build_registry + +if TYPE_CHECKING: + from auto_reverse.models import Signature + +SYSTEM_PROMPT = """\ +You are auto-reverse, an assistant that reverse-engineers a website's API. +Drive the browser toward the user's stated intent using the browser_* tools. +After actions, inspect captured traffic with flows_search and enrich notable +endpoints with doc_document (give a short summary, description, and tag). +Pursue the intent to a sensible depth, then summarize what you found and ask +what to do next. Be concise. +""" + + +def _parse_args(argv: list[str]) -> Config: + p = argparse.ArgumentParser(prog="auto-reverse") + p.add_argument("target_url") + p.add_argument("--out") + p.add_argument("--proxy-port", type=int, default=8080) + p.add_argument("--headless", action="store_true") + p.add_argument("--profile") + p.add_argument("--gen-client", action="store_true") + p.add_argument("--model", default="claude-opus-4-8") + p.add_argument("--scope", default="") + p.add_argument("--no-llm-doc", action="store_true") + p.add_argument("--resume") + a = p.parse_args(argv) + return Config( + target_url=a.target_url, + out_dir=a.out, + proxy_port=a.proxy_port, + headless=a.headless, + profile=a.profile, + gen_client=a.gen_client, + model=a.model, + scope_hosts={h for h in a.scope.split(",") if h}, + no_llm_doc=a.no_llm_doc, + resume=a.resume, + ) + + +def run(argv: list[str] | None = None) -> int: + cfg = _parse_args(argv if argv is not None else sys.argv[1:]) + out_dir = Path( + cfg.out_dir + or f"./auto-reverse-out/{cfg.target_host}-{datetime.now(DT_UTC):%Y%m%d-%H%M%S}" + ) + out_dir.mkdir(parents=True, exist_ok=True) + + scope = ScopeFilter(target_hosts=cfg.all_scope_hosts()) + title = f"{cfg.target_host} API" + + engine_box: dict[str, DocEngine] = {} + + def on_new(sig: Signature) -> None: + engine = engine_box.get("engine") + if engine is not None: + threading.Thread(target=engine.document, args=(sig,), daemon=True).start() + + store = FlowStore(scope, on_new_signature=on_new) + engine = DocEngine(store, out_dir=out_dir, title=title, use_llm=not cfg.no_llm_doc) + engine_box["engine"] = engine + + proxy = ProxyServer(store, archive_path=out_dir / "archive.log", port=cfg.proxy_port) + proxy.start() + + browser = Browser(proxy_port=cfg.proxy_port, headless=cfg.headless) + browser.start() + browser.navigate(cfg.target_url) + + if cfg.auth == "manual" and not cfg.headless: + input("Log in if needed, then press Enter to begin exploration... ") + + client = Anthropic(api_key=os.environ.get("ANTHROPIC_API_KEY")) + registry = build_registry(browser, store, engine) + agent = Agent(client, registry, model=cfg.model, system=SYSTEM_PROMPT) + repl = Repl(agent, store, spec_path=str(out_dir / "openapi.yaml")) + + try: + repl.run() + finally: + browser.stop() + proxy.stop() + if cfg.gen_client: + ok = generate_client(out_dir / "openapi.yaml", out_dir / "client") + print("client generated" if ok else "client generation skipped/failed") + print(f"Outputs in {out_dir}") + return 0 diff --git a/src/auto_reverse/repl.py b/src/auto_reverse/repl.py new file mode 100644 index 0000000..b5ab2ac --- /dev/null +++ b/src/auto_reverse/repl.py @@ -0,0 +1,56 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from auto_reverse.agent import Agent + from auto_reverse.store import FlowStore + +HELP = """\ +Commands: + state intent in natural language (sent to the agent) + /flows [q] list/search discovered endpoints (local) + /spec show spec path + endpoint count + /help this help + /quit exit +""" + + +class Repl: + def __init__(self, agent: Agent, store: FlowStore, spec_path: str) -> None: + self._agent = agent + self._store = store + self._spec_path = spec_path + + def handle(self, line: str) -> str | None: + """Process one input line. Returns output text, or None to signal quit.""" + line = line.strip() + if not line: + return "" + if line in ("/quit", "/exit"): + return None + if line == "/help": + return HELP + if line == "/spec": + return f"{self._spec_path} — {len(self._store.endpoints())} endpoint(s)" + if line.startswith("/flows"): + query = line[len("/flows"):].strip() + records = self._store.search(query) if query else self._store.endpoints() + return "\n".join( + f"{r.signature.method} {r.signature.path_template}" for r in records + ) or "(no endpoints yet)" + return self._agent.run_turn(line) + + def run(self) -> None: # pragma: no cover - interactive loop + print(HELP) + while True: + try: + line = input("> ") + except (EOFError, KeyboardInterrupt): + print() + break + out = self.handle(line) + if out is None: + break + if out: + print(out) diff --git a/tests/test_cli.py b/tests/test_cli.py new file mode 100644 index 0000000..4267ba9 --- /dev/null +++ b/tests/test_cli.py @@ -0,0 +1,15 @@ +from auto_reverse.cli import _parse_args + + +def test_parse_minimal(): + cfg = _parse_args(["https://app.example.com"]) + assert cfg.target_url == "https://app.example.com" + assert cfg.model == "claude-opus-4-8" + assert cfg.headless is False + + +def test_parse_scope_and_flags(): + cfg = _parse_args(["https://x.com", "--scope", "a.com,b.com", "--headless", "--gen-client"]) + assert cfg.scope_hosts == {"a.com", "b.com"} + assert cfg.headless is True + assert cfg.gen_client is True diff --git a/tests/test_e2e_smoke.py b/tests/test_e2e_smoke.py new file mode 100644 index 0000000..a8fe684 --- /dev/null +++ b/tests/test_e2e_smoke.py @@ -0,0 +1,57 @@ +from __future__ import annotations + +import threading +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from pathlib import Path + +import pytest + +playwright = pytest.importorskip("playwright.sync_api") + +from auto_reverse.browser import Browser # noqa: E402 +from auto_reverse.doc.engine import DocEngine # noqa: E402 +from auto_reverse.proxy import ProxyServer # noqa: E402 +from auto_reverse.store import FlowStore, ScopeFilter # noqa: E402 + + +def test_capture_to_spec_end_to_end(tmp_path: Path, fixture_site: str): + from urllib.parse import urlsplit + + host = urlsplit(fixture_site).hostname + port = urlsplit(fixture_site).port + scope = ScopeFilter(target_hosts={f"{host}:{port}", host}) + + engine_holder: dict = {} + + def on_new(sig): + engine_holder["engine"].document(sig) + + store = FlowStore(scope, on_new_signature=on_new) + engine = DocEngine(store, out_dir=tmp_path, title="fixture", use_llm=False) + engine_holder["engine"] = engine + + proxy = ProxyServer(store, archive_path=tmp_path / "archive.log", port=0) + try: + proxy.start() + except Exception as exc: + pytest.skip(f"proxy unavailable: {exc}") + + try: + browser = Browser(proxy_port=proxy.port, headless=True) + browser.start() + except Exception as exc: + proxy.stop() + pytest.skip(f"browser unavailable: {exc}") + + try: + browser.navigate(fixture_site + "/") # triggers fetch('/api/users') + # allow capture to settle + threading.Event().wait(1.0) + assert any( + "/api/users" in r.signature.path_template for r in store.endpoints() + ) + finally: + browser.stop() + proxy.stop() diff --git a/tests/test_repl.py b/tests/test_repl.py new file mode 100644 index 0000000..831e468 --- /dev/null +++ b/tests/test_repl.py @@ -0,0 +1,32 @@ +from auto_reverse.models import CapturedFlow +from auto_reverse.repl import Repl +from auto_reverse.store import FlowStore, ScopeFilter + + +class FakeAgent: + def run_turn(self, msg): + return f"agent saw: {msg}" + + +def _store(): + s = FlowStore(ScopeFilter(target_hosts={"ex.com"})) + s.ingest(CapturedFlow( + method="GET", host="ex.com", path="/api/users", query={}, req_headers={}, + req_body=None, status=200, resp_headers={}, resp_body=None, timestamp=0.0, + )) + return s + + +def test_quit_returns_none(): + repl = Repl(FakeAgent(), _store(), "openapi.yaml") + assert repl.handle("/quit") is None + + +def test_flows_lists_endpoints(): + repl = Repl(FakeAgent(), _store(), "openapi.yaml") + assert "/api/users" in repl.handle("/flows") + + +def test_plain_text_goes_to_agent(): + repl = Repl(FakeAgent(), _store(), "openapi.yaml") + assert repl.handle("map users") == "agent saw: map users"