feat: thread-safe FlowStore with signature dedup and samples
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,9 +1,15 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
import threading
|
||||||
|
from dataclasses import dataclass
|
||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
|
from auto_reverse.models import EndpointRecord, Signature, status_class
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
|
from collections.abc import Callable
|
||||||
|
|
||||||
from auto_reverse.models import CapturedFlow
|
from auto_reverse.models import CapturedFlow
|
||||||
|
|
||||||
_UUID = re.compile(r"^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$")
|
_UUID = re.compile(r"^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$")
|
||||||
@@ -62,3 +68,80 @@ class ScopeFilter:
|
|||||||
return False
|
return False
|
||||||
ctype = flow.resp_headers.get("content-type", "").lower()
|
ctype = flow.resp_headers.get("content-type", "").lower()
|
||||||
return not ctype.startswith(("text/css", "image/", "font/", "application/javascript"))
|
return not ctype.startswith(("text/css", "image/", "font/", "application/javascript"))
|
||||||
|
|
||||||
|
|
||||||
|
MAX_SAMPLES = 5
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class IngestResult:
|
||||||
|
in_scope: bool
|
||||||
|
is_new: bool
|
||||||
|
signature: Signature | None
|
||||||
|
|
||||||
|
|
||||||
|
class FlowStore:
|
||||||
|
"""Thread-safe store: dedup by signature, retain bounded samples per endpoint."""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
scope: ScopeFilter,
|
||||||
|
on_new_signature: Callable[[Signature], None] | None = None,
|
||||||
|
) -> None:
|
||||||
|
self._scope = scope
|
||||||
|
self._on_new = on_new_signature
|
||||||
|
self._lock = threading.Lock()
|
||||||
|
self._records: dict[Signature, EndpointRecord] = {}
|
||||||
|
self._samples: dict[Signature, list[CapturedFlow]] = {}
|
||||||
|
|
||||||
|
def signature_of(self, flow: CapturedFlow) -> Signature:
|
||||||
|
return Signature(
|
||||||
|
method=flow.method.upper(),
|
||||||
|
host=flow.host,
|
||||||
|
path_template=path_template(flow.path),
|
||||||
|
status_class=status_class(flow.status),
|
||||||
|
)
|
||||||
|
|
||||||
|
def ingest(self, flow: CapturedFlow) -> IngestResult:
|
||||||
|
if not self._scope.is_in_scope(flow):
|
||||||
|
return IngestResult(in_scope=False, is_new=False, signature=None)
|
||||||
|
sig = self.signature_of(flow)
|
||||||
|
with self._lock:
|
||||||
|
is_new = sig not in self._records
|
||||||
|
if is_new:
|
||||||
|
self._records[sig] = EndpointRecord(signature=sig)
|
||||||
|
self._samples[sig] = []
|
||||||
|
record = self._records[sig]
|
||||||
|
record.sample_count += 1
|
||||||
|
record.query_params.update(self._query_keys(flow))
|
||||||
|
samples = self._samples[sig]
|
||||||
|
if len(samples) < MAX_SAMPLES:
|
||||||
|
samples.append(flow)
|
||||||
|
if is_new and self._on_new is not None:
|
||||||
|
self._on_new(sig)
|
||||||
|
return IngestResult(in_scope=True, is_new=is_new, signature=sig)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _query_keys(flow: CapturedFlow) -> set[str]:
|
||||||
|
return set(flow.query.keys())
|
||||||
|
|
||||||
|
def endpoints(self) -> list[EndpointRecord]:
|
||||||
|
with self._lock:
|
||||||
|
return list(self._records.values())
|
||||||
|
|
||||||
|
def samples(self, sig: Signature) -> list[CapturedFlow]:
|
||||||
|
with self._lock:
|
||||||
|
return list(self._samples.get(sig, []))
|
||||||
|
|
||||||
|
def get(self, sig: Signature) -> EndpointRecord | None:
|
||||||
|
with self._lock:
|
||||||
|
return self._records.get(sig)
|
||||||
|
|
||||||
|
def search(self, query: str) -> list[EndpointRecord]:
|
||||||
|
q = query.lower()
|
||||||
|
with self._lock:
|
||||||
|
return [
|
||||||
|
r for r in self._records.values()
|
||||||
|
if q in r.signature.path_template.lower()
|
||||||
|
or q in r.signature.method.lower()
|
||||||
|
]
|
||||||
|
|||||||
+45
-1
@@ -1,4 +1,5 @@
|
|||||||
from auto_reverse.store import path_template
|
from auto_reverse.models import CapturedFlow, Signature
|
||||||
|
from auto_reverse.store import FlowStore, ScopeFilter, path_template
|
||||||
|
|
||||||
|
|
||||||
def test_collapses_numeric_ids():
|
def test_collapses_numeric_ids():
|
||||||
@@ -21,3 +22,46 @@ def test_keeps_short_words():
|
|||||||
def test_root_and_empty():
|
def test_root_and_empty():
|
||||||
assert path_template("/") == "/"
|
assert path_template("/") == "/"
|
||||||
assert path_template("") == "/"
|
assert path_template("") == "/"
|
||||||
|
|
||||||
|
|
||||||
|
def _post(host: str, path: str, body: bytes) -> CapturedFlow:
|
||||||
|
return CapturedFlow(
|
||||||
|
method="POST", host=host, path=path, query={},
|
||||||
|
req_headers={"content-type": "application/json"}, req_body=body,
|
||||||
|
status=201, resp_headers={"content-type": "application/json"},
|
||||||
|
resp_body=b'{"ok": true}', timestamp=0.0,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_ingest_new_signature_returns_true_once():
|
||||||
|
store = FlowStore(ScopeFilter(target_hosts={"ex.com"}))
|
||||||
|
assert store.ingest(_post("ex.com", "/api/cart/1", b'{"q": 1}')).is_new is True
|
||||||
|
# same template, different id -> not new
|
||||||
|
assert store.ingest(_post("ex.com", "/api/cart/2", b'{"q": 2}')).is_new is False
|
||||||
|
assert len(store.endpoints()) == 1
|
||||||
|
|
||||||
|
|
||||||
|
def test_out_of_scope_flow_ignored():
|
||||||
|
store = FlowStore(ScopeFilter(target_hosts={"ex.com"}))
|
||||||
|
result = store.ingest(_post("other.com", "/x", b"{}"))
|
||||||
|
assert result.is_new is False
|
||||||
|
assert result.in_scope is False
|
||||||
|
assert store.endpoints() == []
|
||||||
|
|
||||||
|
|
||||||
|
def test_new_signature_callback_fires_with_signature():
|
||||||
|
seen: list[Signature] = []
|
||||||
|
store = FlowStore(ScopeFilter(target_hosts={"ex.com"}), on_new_signature=seen.append)
|
||||||
|
store.ingest(_post("ex.com", "/api/cart/1", b"{}"))
|
||||||
|
store.ingest(_post("ex.com", "/api/cart/2", b"{}"))
|
||||||
|
assert len(seen) == 1
|
||||||
|
assert seen[0].path_template == "/api/cart/{id}"
|
||||||
|
|
||||||
|
|
||||||
|
def test_search_filters_by_substring():
|
||||||
|
store = FlowStore(ScopeFilter(target_hosts={"ex.com"}))
|
||||||
|
store.ingest(_post("ex.com", "/api/cart/1", b"{}"))
|
||||||
|
store.ingest(_post("ex.com", "/api/login", b"{}"))
|
||||||
|
results = store.search("cart")
|
||||||
|
assert len(results) == 1
|
||||||
|
assert results[0].signature.path_template == "/api/cart/{id}"
|
||||||
|
|||||||
Reference in New Issue
Block a user