Replace hardcoded DB credentials with environment-driven configuration.

Centralize DB settings in ingestion config, remove embedded secrets from ingestion helpers, and add an idempotent PostgreSQL bootstrap script to create role/database and apply schema safely.

Made-with: Cursor
This commit is contained in:
David Doebel
2026-04-02 16:30:50 +02:00
parent e9b3a4aac3
commit b3663258e4
10 changed files with 189 additions and 35 deletions

14
.env.example Normal file
View File

@@ -0,0 +1,14 @@
DB_HOST=localhost
DB_PORT=5432
DB_NAME=options_db
DB_USER=quant_user
DB_PASSWORD=change_me
PIPELINE_SYMBOLS=SPY
# For scripts/setup_postgres.py when creating role/database:
# Use a superuser/admin account that can CREATE ROLE and CREATE DATABASE.
POSTGRES_ADMIN_USER=postgres
POSTGRES_ADMIN_PASSWORD=postgres
POSTGRES_ADMIN_HOST=localhost
POSTGRES_ADMIN_PORT=5432
POSTGRES_ADMIN_DB=postgres

108
scripts/setup_postgres.py Normal file
View File

@@ -0,0 +1,108 @@
#!/usr/bin/env python3
"""
Idempotent PostgreSQL bootstrap script for the option_pricing project.
What it does:
1) Creates the project role if it does not exist.
2) Creates the project database if it does not exist.
3) Grants ownership/privileges.
4) Applies src/data/sql/schema.sql to the project database.
Configuration comes from environment variables (see .env.example).
"""
from __future__ import annotations
import os
from pathlib import Path
import psycopg2
from psycopg2 import sql
ROOT = Path(__file__).resolve().parents[1]
SCHEMA_PATH = ROOT / "src" / "data" / "sql" / "schema.sql"
def _env(name: str, default: str | None = None) -> str:
value = os.getenv(name, default)
if value is None:
raise RuntimeError(f"Missing required environment variable: {name}")
return value
def admin_connect(dbname: str):
return psycopg2.connect(
dbname=dbname,
user=_env("POSTGRES_ADMIN_USER", "postgres"),
password=_env("POSTGRES_ADMIN_PASSWORD", "postgres"),
host=_env("POSTGRES_ADMIN_HOST", "localhost"),
port=_env("POSTGRES_ADMIN_PORT", "5432"),
)
def ensure_role_and_database() -> None:
db_user = _env("DB_USER", "quant_user")
db_password = _env("DB_PASSWORD", "")
db_name = _env("DB_NAME", "options_db")
admin_db = _env("POSTGRES_ADMIN_DB", "postgres")
with admin_connect(admin_db) as conn:
conn.autocommit = True
with conn.cursor() as cur:
cur.execute("SELECT 1 FROM pg_roles WHERE rolname = %s", (db_user,))
role_exists = cur.fetchone() is not None
if not role_exists:
cur.execute(
sql.SQL("CREATE ROLE {} WITH LOGIN PASSWORD %s").format(
sql.Identifier(db_user)
),
(db_password,),
)
else:
cur.execute(
sql.SQL("ALTER ROLE {} WITH LOGIN PASSWORD %s").format(
sql.Identifier(db_user)
),
(db_password,),
)
cur.execute("SELECT 1 FROM pg_database WHERE datname = %s", (db_name,))
db_exists = cur.fetchone() is not None
if not db_exists:
cur.execute(
sql.SQL("CREATE DATABASE {} OWNER {}").format(
sql.Identifier(db_name),
sql.Identifier(db_user),
)
)
else:
cur.execute(
sql.SQL("ALTER DATABASE {} OWNER TO {}").format(
sql.Identifier(db_name),
sql.Identifier(db_user),
)
)
def apply_schema() -> None:
if not SCHEMA_PATH.exists():
raise FileNotFoundError(f"Schema file not found: {SCHEMA_PATH}")
schema_sql = SCHEMA_PATH.read_text(encoding="utf-8")
with admin_connect(_env("DB_NAME", "options_db")) as conn:
conn.autocommit = True
with conn.cursor() as cur:
cur.execute(schema_sql)
def main() -> None:
print("Ensuring role/database exist...")
ensure_role_and_database()
print("Applying schema...")
apply_schema()
print("Database setup complete.")
if __name__ == "__main__":
main()

View File

@@ -1,14 +0,0 @@
DB_CONFIG = {
"host": "localhost",
"port": 5432,
"database": "options_db",
"user": "quant_user",
"password": "strong_password",
}
PIPELINE_CONFIG = {
"symbols": [
"SPY"
# Example: "SPY"
]
}

View File

@@ -1,13 +1,15 @@
import psycopg2
import pandas as pd
conn = psycopg2.connect(
dbname="options_db",
user="quant_user",
password="strong_password",
host="144.91.73.49",
port="5432"
)
from option_pricing.src.data.ingestion.db_connect import db_engine
cursor = conn.cursor()
cursor.execute("SELECT * FROM underlyings;")
print(cursor.fetchall())
def fetch_underlyings() -> pd.DataFrame:
"""
Fetch all entries from the underlyings table using configured DB credentials.
"""
engine = db_engine()
return pd.read_sql("SELECT * FROM underlyings;", engine)
if __name__ == "__main__":
print(fetch_underlyings())

View File

@@ -0,0 +1,3 @@
from .settings import DB_CONFIG, PIPELINE_CONFIG
__all__ = ["DB_CONFIG", "PIPELINE_CONFIG"]

View File

@@ -0,0 +1,31 @@
import os
def _get_env_int(name: str, default: int) -> int:
raw = os.getenv(name)
if raw is None:
return default
try:
return int(raw)
except ValueError as exc:
raise ValueError(f"Environment variable {name} must be an integer, got '{raw}'") from exc
def _get_env_list(name: str, default: list[str]) -> list[str]:
raw = os.getenv(name)
if not raw:
return default
return [x.strip() for x in raw.split(",") if x.strip()]
DB_CONFIG = {
"host": os.getenv("DB_HOST", "localhost"),
"port": _get_env_int("DB_PORT", 5432),
"database": os.getenv("DB_NAME", "options_db"),
"user": os.getenv("DB_USER", "quant_user"),
"password": os.getenv("DB_PASSWORD", ""),
}
PIPELINE_CONFIG = {
"symbols": _get_env_list("PIPELINE_SYMBOLS", ["SPY"]),
}

View File

@@ -0,0 +1,13 @@
from sqlalchemy import create_engine
from option_pricing.src.data.ingestion.config.settings import DB_CONFIG
def build_db_url() -> str:
return (
f"postgresql+psycopg2://{DB_CONFIG['user']}:{DB_CONFIG['password']}"
f"@{DB_CONFIG['host']}:{DB_CONFIG['port']}/{DB_CONFIG['database']}"
)
def db_engine():
db_url = build_db_url()
engine = create_engine(db_url, future=True)
return engine

View File

@@ -1,16 +1,14 @@
from datetime import datetime, timedelta
import pandas as pd
import yfinance as yf
from sqlalchemy import create_engine
from db_connect import db_engine
# --- CONFIG ---
TICKERS = ["UBS", "^GSPC"]
DAYS_BACK = 21 # ~3 weeks
TABLE_NAME = "prices"
DB_URI = "postgresql://quant_user:strong_password@localhost:5432/options_db"
def fetch_data(tickers, start_date, end_date):
data = yf.download(
tickers,
@@ -64,7 +62,7 @@ def main():
raw = fetch_data(TICKERS, start_date, end_date)
df = transform_data(raw)
engine = create_engine(DB_URI)
engine = db_engine()
load_to_postgres(df, engine)
print("Ingestion complete.")

View File

@@ -1,11 +1,11 @@
from datetime import datetime, timezone
from decimal import Decimal, InvalidOperation
import pandas as pd
import yfinance as yf
from sqlalchemy import create_engine, text
from sqlalchemy import text
from config.settings import DB_CONFIG, PIPELINE_CONFIG
from option_pricing.src.data.ingestion.config import DB_CONFIG, PIPELINE_CONFIG
from db_connect import db_engine
def build_db_url() -> str:
@@ -269,8 +269,7 @@ def ingest_symbol(symbol: str, engine):
def main():
db_url = build_db_url()
engine = create_engine(db_url, future=True)
engine = db_engine()
for symbol in PIPELINE_CONFIG["symbols"]:
ingest_symbol(symbol, engine)