feat: deterministic JSON schema inference (genson wrapper)

This commit is contained in:
2026-05-31 23:57:47 +08:00
parent 7956b49e28
commit 5bc7e5b89d
3 changed files with 54 additions and 0 deletions
View File
+24
View File
@@ -0,0 +1,24 @@
from __future__ import annotations
from typing import Any
from genson import SchemaBuilder
class SchemaAccumulator:
"""Accumulate JSON samples into a widening JSON Schema (genson-backed)."""
def __init__(self) -> None:
self._builder = SchemaBuilder()
self._count = 0
def add(self, value: Any) -> None:
self._builder.add_object(value)
self._count += 1
def schema(self) -> dict[str, Any] | None:
if self._count == 0:
return None
result: dict[str, Any] = self._builder.to_schema()
result.pop("$schema", None)
return result
+30
View File
@@ -0,0 +1,30 @@
from auto_reverse.doc.schema import SchemaAccumulator
def test_single_object_schema():
acc = SchemaAccumulator()
acc.add({"id": 1, "name": "Ada"})
schema = acc.schema()
assert schema["type"] == "object"
assert set(schema["properties"]) == {"id", "name"}
def test_merge_widens_optional_fields():
acc = SchemaAccumulator()
acc.add({"id": 1, "name": "Ada"})
acc.add({"id": 2}) # name missing -> becomes optional
schema = acc.schema()
assert "id" in schema.get("required", [])
assert "name" not in schema.get("required", [])
def test_array_schema():
acc = SchemaAccumulator()
acc.add([{"id": 1}, {"id": 2}])
schema = acc.schema()
assert schema["type"] == "array"
assert schema["items"]["type"] == "object"
def test_empty_accumulator_returns_none():
assert SchemaAccumulator().schema() is None