learning_ai_common_plat/scripts/cosmos-cost-report.sh
Saravanakumar D 6d66355a22 feat(scripts): add Cosmos DB cost report tooling (.sh + .ps1)
Reports billed cost, RU by database, RU by container drill-down, and
storage for the cosmos-mywisprai account. Auto-detects serverless vs
provisioned billing mode.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-05-30 23:30:22 -07:00

201 lines
9.5 KiB
Bash
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env bash
# ──────────────────────────────────────────────────────────────────
# Cosmos DB — Cost / RU-consumption report
#
# Identifies which database (product) and which container drives the
# most Azure Cosmos DB cost. For SERVERLESS accounts cost is dominated
# by Request Units (RU) consumed; for PROVISIONED accounts it is the
# provisioned throughput. The script auto-detects the billing mode.
#
# What it reports:
# 1. Account billing mode (serverless vs provisioned) + region
# 2. Actual billed cost (Cost Management, last 30d) by service
# 3. RU consumption by database, ranked, with est. monthly $
# 4. RU consumption by container for the top databases
# 5. Storage (DataUsage) by database
#
# Prerequisites:
# - Azure CLI installed and authenticated (az login)
# - python3 on PATH (used for JSON shaping + date math)
# - COSMOS_ACCOUNT_NAME and COSMOS_RESOURCE_GROUP set (or pass flags)
#
# Usage:
# COSMOS_ACCOUNT_NAME=cosmos-mywisprai COSMOS_RESOURCE_GROUP=rg-mywisprai \
# ./scripts/cosmos-cost-report.sh
# ./scripts/cosmos-cost-report.sh -a cosmos-mywisprai -g rg-mywisprai -d 7
#
# Flags (override env):
# -a, --account Cosmos account name (COSMOS_ACCOUNT_NAME)
# -g, --resource-group Resource group (COSMOS_RESOURCE_GROUP)
# -d, --days Lookback window for RU (DAYS, default 7)
# -t, --top Rows per table (TOP, default 15)
# --drill #DBs to drill into (DRILL, default 3)
# --rate USD per 1M RU (serverless) (SERVERLESS_RU_RATE, 0.25)
# ──────────────────────────────────────────────────────────────────
set -euo pipefail
ACCOUNT="${COSMOS_ACCOUNT_NAME:-}"
RG="${COSMOS_RESOURCE_GROUP:-}"
DAYS="${DAYS:-7}"
TOP="${TOP:-15}"
DRILL="${DRILL:-3}"
RU_RATE="${SERVERLESS_RU_RATE:-0.25}"
while [[ $# -gt 0 ]]; do
case "$1" in
-a|--account) ACCOUNT="$2"; shift 2 ;;
-g|--resource-group) RG="$2"; shift 2 ;;
-d|--days) DAYS="$2"; shift 2 ;;
-t|--top) TOP="$2"; shift 2 ;;
--drill) DRILL="$2"; shift 2 ;;
--rate) RU_RATE="$2"; shift 2 ;;
-h|--help) sed -n '2,46p' "$0"; exit 0 ;;
*) echo "Unknown arg: $1" >&2; exit 2 ;;
esac
done
[[ -z "$ACCOUNT" ]] && { echo "ERROR: set COSMOS_ACCOUNT_NAME or pass -a" >&2; exit 2; }
[[ -z "$RG" ]] && { echo "ERROR: set COSMOS_RESOURCE_GROUP or pass -g" >&2; exit 2; }
command -v az >/dev/null || { echo "ERROR: az CLI not found" >&2; exit 2; }
command -v python3 >/dev/null || { echo "ERROR: python3 not found" >&2; exit 2; }
START="$(python3 -c 'import datetime,sys;print((datetime.datetime.utcnow()-datetime.timedelta(days=int(sys.argv[1]))).strftime("%Y-%m-%dT%H:%M:%SZ"))' "$DAYS")"
END="$(python3 -c 'import datetime;print(datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"))')"
echo "════════════════════════════════════════════════════════════════"
echo " Cosmos DB Cost Report"
echo " Account: $ACCOUNT Resource group: $RG"
echo " RU window: last ${DAYS}d ($START$END)"
echo "════════════════════════════════════════════════════════════════"
RID="$(az cosmosdb show -n "$ACCOUNT" -g "$RG" --query id -o tsv)"
CAPS="$(az cosmosdb show -n "$ACCOUNT" -g "$RG" --query "capabilities[].name" -o tsv | tr '\n' ',' || true)"
REGION="$(az cosmosdb show -n "$ACCOUNT" -g "$RG" --query "locations[0].locationName" -o tsv)"
if echo "$CAPS" | grep -q "EnableServerless"; then
MODE="SERVERLESS"
else
MODE="PROVISIONED"
fi
echo ""
echo "▶ Billing mode : $MODE"
echo "▶ Region : $REGION"
echo "▶ Est. RU rate : \$$RU_RATE per 1M RU (serverless)"
# ── 1. Actual billed cost (Cost Management, last 30d) ─────────────
echo ""
echo "── Actual billed cost — last 30d by service (resource group) ──"
SUB="$(az account show --query id -o tsv)"
CM_FROM="$(python3 -c 'import datetime;print((datetime.datetime.utcnow()-datetime.timedelta(days=30)).strftime("%Y-%m-%dT00:00:00Z"))')"
CM_TO="$(python3 -c 'import datetime;print(datetime.datetime.utcnow().strftime("%Y-%m-%dT00:00:00Z"))')"
CM_BODY="$(mktemp)"
cat >"$CM_BODY" <<JSON
{"type":"ActualCost","timeframe":"Custom","timePeriod":{"from":"$CM_FROM","to":"$CM_TO"},
"dataset":{"granularity":"None","aggregation":{"totalCost":{"name":"Cost","function":"Sum"}},
"grouping":[{"type":"Dimension","name":"ServiceName"}]}}
JSON
if az rest --method post \
--url "https://management.azure.com/subscriptions/$SUB/resourceGroups/$RG/providers/Microsoft.CostManagement/query?api-version=2023-11-01" \
--body "@$CM_BODY" --headers "Content-Type=application/json" \
--query "properties.rows" -o json 2>/dev/null | \
python3 -c '
import json,sys
try: rows=json.load(sys.stdin) or []
except Exception: rows=[]
rows=[r for r in rows if isinstance(r,list) and len(r)>=2]
rows.sort(key=lambda r: float(r[0]), reverse=True)
if not rows:
print(" (no cost data — needs Cost Management reader on the subscription)")
for r in rows:
print(" %-28s $%8.2f %s" % (str(r[1])[:28], float(r[0]), r[2] if len(r)>2 else ""))
'; then :; else
echo " (cost query unavailable — continuing with RU metrics)"
fi
rm -f "$CM_BODY"
# Helper: query a TotalRequestUnits metric split by a dimension and emit
# "<name>\t<total_ru>" lines, ranked desc, with est monthly $.
ru_table() {
local filter="$1" dimidx="$2"
az monitor metrics list --resource "$RID" --metric TotalRequestUnits \
--aggregation Total --interval P1D --start-time "$START" --end-time "$END" \
--filter "$filter" --top 500 \
--query "value[0].timeseries[].{k: metadatavalues[$dimidx].value, ru: sum(data[].total)}" \
-o json 2>/dev/null
}
render_ru() {
python3 -c '
import json,sys
days=float(sys.argv[1]); rate=float(sys.argv[2]); top=int(sys.argv[3])
try: items=json.load(sys.stdin) or []
except Exception: items=[]
rows=[]
for it in items:
ru=it.get("ru") or 0
rows.append((it.get("k") or "<none>", float(ru)))
rows.sort(key=lambda x:x[1], reverse=True)
tot=sum(r[1] for r in rows) or 1.0
print(" %-28s %16s %8s %10s" % ("name","RU ("+str(int(days))+"d)","share","est $/mo"))
print(" "+"-"*68)
for name,ru in rows[:top]:
est=ru/days*30.0/1_000_000.0*rate
print(" %-28s %16s %7.1f%% %9.2f" % (name[:28], "{:,.0f}".format(ru), 100*ru/tot, est))
proj=tot/days*30.0/1_000_000.0*rate
print(" "+"-"*68)
print(" %-28s %16s %8s %9.2f" % ("TOTAL", "{:,.0f}".format(tot), "", proj))
' "$DAYS" "$RU_RATE" "$TOP"
}
# ── 2. RU by database ─────────────────────────────────────────────
echo ""
echo "── RU consumption by database (product) — last ${DAYS}d ──"
DB_JSON="$(ru_table "DatabaseName eq '*'" 0)"
echo "$DB_JSON" | render_ru
# ── 3. Drill into the top databases by container ──────────────────
TOP_DBS="$(echo "$DB_JSON" | python3 -c '
import json,sys
n=int(sys.argv[1])
try: items=json.load(sys.stdin) or []
except Exception: items=[]
items=[(i.get("k") or "", float(i.get("ru") or 0)) for i in items]
items.sort(key=lambda x:x[1], reverse=True)
for k,ru in items[:n]:
if k and ru>0: print(k)
' "$DRILL")"
for DB in $TOP_DBS; do
echo ""
echo "── RU by container in '$DB' — last ${DAYS}d ──"
ru_table "DatabaseName eq '$DB' and CollectionName eq '*'" 0 | render_ru
done
# ── 4. Storage by database ────────────────────────────────────────
echo ""
echo "── Storage (DataUsage) by database — latest snapshot ──"
az monitor metrics list --resource "$RID" --metric DataUsage \
--aggregation Total --interval PT1H --start-time "$START" --end-time "$END" \
--filter "DatabaseName eq '*'" --top 200 \
--query "value[0].timeseries[].{k: metadatavalues[0].value, b: max(data[].total)}" \
-o json 2>/dev/null | python3 -c '
import json,sys
try: items=json.load(sys.stdin) or []
except Exception: items=[]
rows=[(i.get("k") or "<none>", float(i.get("b") or 0)) for i in items]
rows.sort(key=lambda x:x[1], reverse=True)
for name,b in rows:
print(" %-28s %10.2f MB" % (name[:28], b/1024/1024))
if not rows: print(" (no storage data)")
'
echo ""
echo "════════════════════════════════════════════════════════════════"
echo " Notes:"
echo " - Serverless cost ≈ RU consumed × \$$RU_RATE/1M + storage(\$~0.25/GB-mo)."
echo " - 'est \$/mo' linearly projects the ${DAYS}d window to 30 days."
echo " - High RU + low request count ⇒ expensive per-op (cross-partition"
echo " queries / large docs) — prime rightsizing target."
echo " - A *_locks container burning RU is usually lock-polling overhead."
echo "════════════════════════════════════════════════════════════════"