feat: thread-safe FlowStore with signature dedup and samples

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-31 23:56:42 +08:00
parent a82d99b12a
commit 7956b49e28
2 changed files with 128 additions and 1 deletions
+83
View File
@@ -1,9 +1,15 @@
from __future__ import annotations
import re
import threading
from dataclasses import dataclass
from typing import TYPE_CHECKING
from auto_reverse.models import EndpointRecord, Signature, status_class
if TYPE_CHECKING:
from collections.abc import Callable
from auto_reverse.models import CapturedFlow
_UUID = re.compile(r"^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$")
@@ -62,3 +68,80 @@ class ScopeFilter:
return False
ctype = flow.resp_headers.get("content-type", "").lower()
return not ctype.startswith(("text/css", "image/", "font/", "application/javascript"))
MAX_SAMPLES = 5
@dataclass
class IngestResult:
in_scope: bool
is_new: bool
signature: Signature | None
class FlowStore:
"""Thread-safe store: dedup by signature, retain bounded samples per endpoint."""
def __init__(
self,
scope: ScopeFilter,
on_new_signature: Callable[[Signature], None] | None = None,
) -> None:
self._scope = scope
self._on_new = on_new_signature
self._lock = threading.Lock()
self._records: dict[Signature, EndpointRecord] = {}
self._samples: dict[Signature, list[CapturedFlow]] = {}
def signature_of(self, flow: CapturedFlow) -> Signature:
return Signature(
method=flow.method.upper(),
host=flow.host,
path_template=path_template(flow.path),
status_class=status_class(flow.status),
)
def ingest(self, flow: CapturedFlow) -> IngestResult:
if not self._scope.is_in_scope(flow):
return IngestResult(in_scope=False, is_new=False, signature=None)
sig = self.signature_of(flow)
with self._lock:
is_new = sig not in self._records
if is_new:
self._records[sig] = EndpointRecord(signature=sig)
self._samples[sig] = []
record = self._records[sig]
record.sample_count += 1
record.query_params.update(self._query_keys(flow))
samples = self._samples[sig]
if len(samples) < MAX_SAMPLES:
samples.append(flow)
if is_new and self._on_new is not None:
self._on_new(sig)
return IngestResult(in_scope=True, is_new=is_new, signature=sig)
@staticmethod
def _query_keys(flow: CapturedFlow) -> set[str]:
return set(flow.query.keys())
def endpoints(self) -> list[EndpointRecord]:
with self._lock:
return list(self._records.values())
def samples(self, sig: Signature) -> list[CapturedFlow]:
with self._lock:
return list(self._samples.get(sig, []))
def get(self, sig: Signature) -> EndpointRecord | None:
with self._lock:
return self._records.get(sig)
def search(self, query: str) -> list[EndpointRecord]:
q = query.lower()
with self._lock:
return [
r for r in self._records.values()
if q in r.signature.path_template.lower()
or q in r.signature.method.lower()
]