Files

68 lines
2.7 KiB
Python
Raw Permalink Normal View History

"""
Patch defeatbeta_api to read from local parquet files with zero network.
Usage:
from offline import enable_offline
enable_offline("data/parquet") # call once before first Ticker()
from defeatbeta_api.data.ticker import Ticker
t = Ticker("AAPL")
t.price() # reads local file, no HTTP
Note: the one-time welcome banner on first import calls get_data_update_time()
once. After that, nothing touches the network.
"""
from pathlib import Path
def enable_offline(parquet_dir: str = "data/parquet") -> None:
local_dir = Path(parquet_dir).resolve()
# Importing these submodules triggers defeatbeta_api/__init__.py on first
# run (prints the welcome banner — one network call). After that it's a
# no-op because _welcome_printed is True.
from defeatbeta_api.client.hugging_face_client import HuggingFaceClient
import defeatbeta_api.client.duckdb_client as _duckdb_mod
from defeatbeta_api.client.duckdb_client import DuckDBClient
from defeatbeta_api.client.duckdb_conf import Configuration
from defeatbeta_api.data.company_meta import CompanyMeta
from defeatbeta_api.utils.util import validate_memory_limit
# 1. Redirect every table URL to a local parquet file
def _local_url(self, table: str) -> str:
path = local_dir / f"{table}.parquet"
if not path.exists():
raise FileNotFoundError(
f"Local parquet not found: {path}\n"
f"Run download_data.py first."
)
return str(path)
HuggingFaceClient.get_url_path = _local_url
# 2. Return a fixed update time (used by beta() and the welcome banner)
HuggingFaceClient.get_data_update_time = lambda self: "offline"
# 3. Skip the startup cache-validation (hits HuggingFace spec.json)
DuckDBClient._validate_httpfs_cache = lambda self: None
# 4. Skip "INSTALL cache_httpfs FROM community" (hits DuckDB extension registry)
# and all the cache_httpfs SET GLOBAL lines that follow — not needed for
# local files. Keep only memory and thread settings.
def _minimal_settings(self):
return [
f"SET GLOBAL memory_limit = '{validate_memory_limit(self.memory_limit)}'",
f"SET GLOBAL threads = {self.threads}",
]
Configuration.get_duckdb_settings = _minimal_settings
# 5. Redirect company_tickers.json to local file
CompanyMeta.COMPANY_TICKERS_URL = str(local_dir / "company_tickers.json")
# 6. Reset the DuckDB singleton so the next Ticker() call reinitialises
# using the patched Configuration (no cache_httpfs install/load)
_duckdb_mod._instance = None
print(f"[offline] defeatbeta_api patched → reading from {local_dir}")