#!/usr/bin/env python3 """Extract token/cost usage from OpenClaw / Clawdbot session JSONL. - Prefers real cost from usage.cost.total when present. - Falls back to estimation using PRICE_* env vars if cost missing. Usage examples: extract_cost.py --today extract_cost.py --last-days 7 extract_cost.py --top-sessions 10 extract_cost.py --last-days 30 --json Env (optional fallback pricing, per 1M tokens): PRICE_INPUT, PRICE_OUTPUT, PRICE_CACHE_READ, PRICE_CACHE_WRITE """ from __future__ import annotations import argparse import dataclasses import datetime as dt import glob import json import os import sys from collections import defaultdict @dataclasses.dataclass class UsageRow: ts: dt.datetime session_file: str session_id: str | None provider: str | None model: str | None input_tokens: int output_tokens: int cache_read: int cache_write: int cost_total: float | None cost_breakdown: dict | None def _parse_dt(s: str) -> dt.datetime | None: try: # 2026-01-28T06:15:42.062Z if s.endswith("Z"): s = s[:-1] + "+00:00" return dt.datetime.fromisoformat(s) except Exception: return None def _env_price(name: str) -> float | None: v = os.environ.get(name) if not v: return None try: return float(v) except Exception: return None def estimate_cost(row: UsageRow) -> float | None: pin = _env_price("PRICE_INPUT") pout = _env_price("PRICE_OUTPUT") pcr = _env_price("PRICE_CACHE_READ") pcw = _env_price("PRICE_CACHE_WRITE") if pin is None or pout is None: return None def per_million(tokens: int, price: float | None) -> float: if price is None: return 0.0 return (tokens / 1_000_000.0) * price return ( per_million(row.input_tokens, pin) + per_million(row.output_tokens, pout) + per_million(row.cache_read, pcr) + per_million(row.cache_write, pcw) ) def iter_jsonl_files() -> list[str]: home = os.path.expanduser("~") paths = [] paths += glob.glob(os.path.join(home, ".openclaw", "agents", "*", "sessions", "*.jsonl")) paths += glob.glob(os.path.join(home, ".clawdbot", "agents", "*", "sessions", "*.jsonl")) return sorted(set(paths)) def extract_rows(files: list[str]) -> list[UsageRow]: rows: list[UsageRow] = [] for fp in files: session_id = None provider = None model = None try: with open(fp, "r", encoding="utf-8", errors="replace") as f: for line in f: line = line.strip() if not line: continue try: j = json.loads(line) except Exception: continue t = j.get("timestamp") if isinstance(t, str): ts = _parse_dt(t) else: ts = None typ = j.get("type") if typ == "session": session_id = j.get("id") continue if typ == "model_change": provider = j.get("provider") model = j.get("modelId") continue # OpenClaw stores usage under j["message"]["usage"]. # Some logs may also include top-level usage. usage = None if isinstance(j.get("usage"), dict): usage = j.get("usage") elif isinstance(j.get("message"), dict) and isinstance(j["message"].get("usage"), dict): usage = j["message"].get("usage") if not isinstance(usage, dict) or ts is None: continue cost = usage.get("cost") if isinstance(usage.get("cost"), dict) else None cost_total = None if cost and isinstance(cost.get("total"), (int, float)): cost_total = float(cost.get("total")) rows.append( UsageRow( ts=ts, session_file=fp, session_id=session_id, provider=j.get("provider") or provider, model=j.get("model") or model, input_tokens=int(usage.get("input") or 0), output_tokens=int(usage.get("output") or 0), cache_read=int(usage.get("cacheRead") or 0), cache_write=int(usage.get("cacheWrite") or 0), cost_total=cost_total, cost_breakdown=cost, ) ) except FileNotFoundError: continue return rows def main() -> int: ap = argparse.ArgumentParser() g = ap.add_mutually_exclusive_group(required=True) g.add_argument("--today", action="store_true") g.add_argument("--yesterday", action="store_true") g.add_argument("--last-days", type=int) ap.add_argument("--top-sessions", type=int, default=0) ap.add_argument("--json", action="store_true") # Budget alerts ap.add_argument("--budget-usd", type=float, default=0.0, help="If >0, warn/exit nonzero when total cost exceeds this budget") ap.add_argument("--budget-mode", choices=["warn", "exit"], default="exit", help="warn: print alert; exit: exit code 2 on breach") args = ap.parse_args() now = dt.datetime.now(dt.timezone.utc) if args.today: start = now.date() end = start elif args.yesterday: start = (now - dt.timedelta(days=1)).date() end = start else: days = int(args.last_days) start = (now - dt.timedelta(days=days - 1)).date() end = now.date() files = iter_jsonl_files() rows = extract_rows(files) # Filter by date range def in_range(r: UsageRow) -> bool: d = r.ts.date() return start <= d <= end rows = [r for r in rows if in_range(r)] daily = defaultdict(lambda: {"calls": 0, "input": 0, "output": 0, "cacheRead": 0, "cacheWrite": 0, "cost": 0.0, "costEstimated": 0.0, "missingCostCalls": 0}) # Session aggregation per_session = defaultdict(lambda: {"calls": 0, "cost": 0.0, "costEstimated": 0.0, "input": 0, "output": 0, "file": None, "provider": None, "model": None}) for r in rows: d = r.ts.date().isoformat() daily[d]["calls"] += 1 daily[d]["input"] += r.input_tokens daily[d]["output"] += r.output_tokens daily[d]["cacheRead"] += r.cache_read daily[d]["cacheWrite"] += r.cache_write sid = r.session_id or os.path.basename(r.session_file) ps = per_session[sid] ps["calls"] += 1 ps["input"] += r.input_tokens ps["output"] += r.output_tokens ps["file"] = r.session_file ps["provider"] = r.provider ps["model"] = r.model if r.cost_total is not None: daily[d]["cost"] += r.cost_total ps["cost"] += r.cost_total else: daily[d]["missingCostCalls"] += 1 est = estimate_cost(r) if est is not None: daily[d]["costEstimated"] += est ps["costEstimated"] += est # Total cost across selected range (for budgets) total_cost = 0.0 for v in daily.values(): total_cost += float(v["cost"] + v["costEstimated"]) budget_breached = args.budget_usd and total_cost > float(args.budget_usd) # Output if args.json: out = { "range": {"start": str(start), "end": str(end)}, "days": dict(sorted(daily.items())), "total": {"cost": total_cost}, } if args.budget_usd: out["budget"] = { "usd": float(args.budget_usd), "breached": bool(budget_breached), "mode": args.budget_mode, } if args.top_sessions: top = sorted(per_session.items(), key=lambda kv: (kv[1]["cost"] + kv[1]["costEstimated"]), reverse=True)[: int(args.top_sessions)] out["topSessions"] = [ { "session": k, **v, "total": v["cost"] + v["costEstimated"], } for k, v in top ] print(json.dumps(out, indent=2)) if budget_breached and args.budget_mode == "exit": return 2 return 0 # Text output (compact) for d in sorted(daily.keys()): v = daily[d] total = v["cost"] + v["costEstimated"] suffix = "" if v["missingCostCalls"] == 0 else f" (missingCostCalls={v['missingCostCalls']})" print(f"{d} calls={v['calls']} cost=${total:.4f}{suffix}") if args.budget_usd: status = "OK" if not budget_breached else "BREACHED" print(f"\nBudget: ${float(args.budget_usd):.2f} Total: ${total_cost:.4f} Status: {status}") if budget_breached: print("ALERT: budget exceeded") if args.top_sessions: print("\nTop sessions:") top = sorted(per_session.items(), key=lambda kv: (kv[1]["cost"] + kv[1]["costEstimated"]), reverse=True)[: int(args.top_sessions)] for sid, v in top: total = v["cost"] + v["costEstimated"] short = os.path.basename(v.get("file") or "") print(f"- ${total:.4f} calls={v['calls']} {sid} ({short})") if budget_breached and args.budget_mode == "exit": return 2 return 0 if __name__ == "__main__": raise SystemExit(main())