From 6d66355a22fe23905d9645819c74f8d9fc47e7c0 Mon Sep 17 00:00:00 2001 From: Saravanakumar D Date: Sat, 30 May 2026 23:28:12 -0700 Subject: [PATCH] feat(scripts): add Cosmos DB cost report tooling (.sh + .ps1) Reports billed cost, RU by database, RU by container drill-down, and storage for the cosmos-mywisprai account. Auto-detects serverless vs provisioned billing mode. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- scripts/cosmos-cost-report.ps1 | 194 ++++++++++++++++++++++++++++++++ scripts/cosmos-cost-report.sh | 200 +++++++++++++++++++++++++++++++++ 2 files changed, 394 insertions(+) create mode 100644 scripts/cosmos-cost-report.ps1 create mode 100755 scripts/cosmos-cost-report.sh diff --git a/scripts/cosmos-cost-report.ps1 b/scripts/cosmos-cost-report.ps1 new file mode 100644 index 00000000..7a68b2ac --- /dev/null +++ b/scripts/cosmos-cost-report.ps1 @@ -0,0 +1,194 @@ +#Requires -Version 5.1 +<# +.SYNOPSIS + Cosmos DB — Cost / RU-consumption report. + +.DESCRIPTION + Identifies which database (product) and which container drives the most + Azure Cosmos DB cost. For SERVERLESS accounts cost is dominated by Request + Units (RU) consumed; for PROVISIONED accounts it is the provisioned + throughput. The billing mode is auto-detected. + + Reports: + 1. Account billing mode (serverless vs provisioned) + region + 2. Actual billed cost (Cost Management, last 30d) by service + 3. RU consumption by database, ranked, with est. monthly $ + 4. RU consumption by container for the top databases + 5. Storage (DataUsage) by database + +.PARAMETER Account + Cosmos account name. Falls back to $env:COSMOS_ACCOUNT_NAME. + +.PARAMETER ResourceGroup + Resource group. Falls back to $env:COSMOS_RESOURCE_GROUP. + +.PARAMETER Days + Lookback window (days) for RU metrics. Default 7 (or $env:DAYS). + +.PARAMETER Top + Rows per table. Default 15 (or $env:TOP). + +.PARAMETER Drill + Number of top databases to drill into by container. Default 3. + +.PARAMETER Rate + USD per 1M RU (serverless). Default 0.25 (or $env:SERVERLESS_RU_RATE). + +.EXAMPLE + $env:COSMOS_ACCOUNT_NAME='cosmos-mywisprai'; $env:COSMOS_RESOURCE_GROUP='rg-mywisprai' + ./scripts/cosmos-cost-report.ps1 + +.EXAMPLE + ./scripts/cosmos-cost-report.ps1 -Account cosmos-mywisprai -ResourceGroup rg-mywisprai -Days 7 + +.NOTES + Prerequisites: Azure CLI installed and authenticated (az login). +#> +[CmdletBinding()] +param( + [string]$Account = $env:COSMOS_ACCOUNT_NAME, + [string]$ResourceGroup = $env:COSMOS_RESOURCE_GROUP, + [int] $Days = $(if ($env:DAYS) { [int]$env:DAYS } else { 7 }), + [int] $Top = $(if ($env:TOP) { [int]$env:TOP } else { 15 }), + [int] $Drill = 3, + [double]$Rate = $(if ($env:SERVERLESS_RU_RATE) { [double]$env:SERVERLESS_RU_RATE } else { 0.25 }) +) + +$ErrorActionPreference = 'Stop' + +if (-not $Account) { Write-Error 'Set COSMOS_ACCOUNT_NAME or pass -Account'; exit 2 } +if (-not $ResourceGroup) { Write-Error 'Set COSMOS_RESOURCE_GROUP or pass -ResourceGroup'; exit 2 } +if (-not (Get-Command az -ErrorAction SilentlyContinue)) { Write-Error 'az CLI not found'; exit 2 } + +$start = (Get-Date).ToUniversalTime().AddDays(-$Days).ToString('yyyy-MM-ddTHH:mm:ssZ') +$end = (Get-Date).ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ssZ') + +Write-Host '════════════════════════════════════════════════════════════════' +Write-Host ' Cosmos DB Cost Report' +Write-Host " Account: $Account Resource group: $ResourceGroup" +Write-Host " RU window: last ${Days}d ($start -> $end)" +Write-Host '════════════════════════════════════════════════════════════════' + +$rid = az cosmosdb show -n $Account -g $ResourceGroup --query id -o tsv +$caps = az cosmosdb show -n $Account -g $ResourceGroup --query "capabilities[].name" -o tsv +$region = az cosmosdb show -n $Account -g $ResourceGroup --query "locations[0].locationName" -o tsv +$mode = if ($caps -match 'EnableServerless') { 'SERVERLESS' } else { 'PROVISIONED' } + +Write-Host '' +Write-Host "▶ Billing mode : $mode" +Write-Host "▶ Region : $region" +Write-Host ("▶ Est. RU rate : `$$Rate per 1M RU (serverless)") + +# ── 1. Actual billed cost (Cost Management, last 30d) ───────────── +Write-Host '' +Write-Host '── Actual billed cost — last 30d by service (resource group) ──' +$sub = az account show --query id -o tsv +$cmFrom = (Get-Date).ToUniversalTime().AddDays(-30).ToString('yyyy-MM-ddT00:00:00Z') +$cmTo = (Get-Date).ToUniversalTime().ToString('yyyy-MM-ddT00:00:00Z') +$cmBody = @{ + type = 'ActualCost' + timeframe = 'Custom' + timePeriod = @{ from = $cmFrom; to = $cmTo } + dataset = @{ + granularity = 'None' + aggregation = @{ totalCost = @{ name = 'Cost'; function = 'Sum' } } + grouping = @(@{ type = 'Dimension'; name = 'ServiceName' }) + } +} | ConvertTo-Json -Depth 10 -Compress +$tmp = New-TemporaryFile +Set-Content -Path $tmp -Value $cmBody -NoNewline +try { + $rows = az rest --method post ` + --url "https://management.azure.com/subscriptions/$sub/resourceGroups/$ResourceGroup/providers/Microsoft.CostManagement/query?api-version=2023-11-01" ` + --body "@$tmp" --headers "Content-Type=application/json" ` + --query "properties.rows" -o json 2>$null | ConvertFrom-Json + if ($rows) { + $rows | Sort-Object { [double]$_[0] } -Descending | ForEach-Object { + ' {0,-28} ${1,8:N2} {2}' -f ([string]$_[1]).Substring(0,[Math]::Min(28,([string]$_[1]).Length)), [double]$_[0], $_[2] + } + } else { + Write-Host ' (no cost data — needs Cost Management reader on the subscription)' + } +} catch { + Write-Host ' (cost query unavailable — continuing with RU metrics)' +} finally { + Remove-Item $tmp -ErrorAction SilentlyContinue +} + +# Helper: query TotalRequestUnits split by a dimension, return [pscustomobject]@{ k; ru } +function Get-RuByDimension { + param([string]$Filter) + $json = az monitor metrics list --resource $rid --metric TotalRequestUnits ` + --aggregation Total --interval P1D --start-time $start --end-time $end ` + --filter $Filter --top 500 ` + --query "value[0].timeseries[].{k: metadatavalues[0].value, ru: sum(data[].total)}" ` + -o json 2>$null + if (-not $json) { return @() } + $parsed = $json | ConvertFrom-Json + if (-not $parsed) { return @() } + ,@($parsed | ForEach-Object { + [pscustomobject]@{ k = $(if ($_.k) { $_.k } else { '' }); ru = [double]($_.ru) } + }) +} + +function Show-RuTable { + param([object[]]$Rows) + $ranked = @($Rows | Sort-Object ru -Descending) + $total = ($ranked | Measure-Object ru -Sum).Sum + if (-not $total -or $total -eq 0) { $total = 1.0 } + ' {0,-28} {1,16} {2,8} {3,10}' -f 'name', "RU (${Days}d)", 'share', 'est $/mo' | Write-Host + ' ' + ('-' * 68) | Write-Host + foreach ($r in ($ranked | Select-Object -First $Top)) { + $est = $r.ru / $Days * 30.0 / 1000000.0 * $Rate + $share = 100.0 * $r.ru / $total + $name = ([string]$r.k).Substring(0,[Math]::Min(28,([string]$r.k).Length)) + ' {0,-28} {1,16} {2,7:N1}% {3,9:N2}' -f $name, ('{0:N0}' -f $r.ru), $share, $est | Write-Host + } + $proj = $total / $Days * 30.0 / 1000000.0 * $Rate + ' ' + ('-' * 68) | Write-Host + ' {0,-28} {1,16} {2,8} {3,9:N2}' -f 'TOTAL', ('{0:N0}' -f $total), '', $proj | Write-Host +} + +# ── 2. RU by database ───────────────────────────────────────────── +Write-Host '' +Write-Host "── RU consumption by database (product) — last ${Days}d ──" +$dbRows = Get-RuByDimension -Filter "DatabaseName eq '*'" +Show-RuTable -Rows $dbRows + +# ── 3. Drill into the top databases by container ────────────────── +$topDbs = @($dbRows | Where-Object { $_.ru -gt 0 -and $_.k -ne '' } | + Sort-Object ru -Descending | Select-Object -First $Drill -ExpandProperty k) +foreach ($db in $topDbs) { + Write-Host '' + Write-Host "── RU by container in '$db' — last ${Days}d ──" + $coll = Get-RuByDimension -Filter "DatabaseName eq '$db' and CollectionName eq '*'" + Show-RuTable -Rows $coll +} + +# ── 4. Storage by database ──────────────────────────────────────── +Write-Host '' +Write-Host '── Storage (DataUsage) by database — latest snapshot ──' +$storeJson = az monitor metrics list --resource $rid --metric DataUsage ` + --aggregation Total --interval PT1H --start-time $start --end-time $end ` + --filter "DatabaseName eq '*'" --top 200 ` + --query "value[0].timeseries[].{k: metadatavalues[0].value, b: max(data[].total)}" ` + -o json 2>$null +$store = if ($storeJson) { @($storeJson | ConvertFrom-Json) } else { @() } +if ($store.Count -eq 0) { + Write-Host ' (no storage data)' +} else { + foreach ($s in ($store | Sort-Object { [double]$_.b } -Descending)) { + $name = ([string]$s.k).Substring(0,[Math]::Min(28,([string]$s.k).Length)) + ' {0,-28} {1,10:N2} MB' -f $name, ([double]$s.b / 1MB) | Write-Host + } +} + +Write-Host '' +Write-Host '════════════════════════════════════════════════════════════════' +Write-Host ' Notes:' +Write-Host " - Serverless cost ~= RU consumed x `$$Rate/1M + storage(`$~0.25/GB-mo)." +Write-Host " - 'est `$/mo' linearly projects the ${Days}d window to 30 days." +Write-Host ' - High RU + low request count => expensive per-op (cross-partition' +Write-Host ' queries / large docs) — prime rightsizing target.' +Write-Host ' - A *_locks container burning RU is usually lock-polling overhead.' +Write-Host '════════════════════════════════════════════════════════════════' diff --git a/scripts/cosmos-cost-report.sh b/scripts/cosmos-cost-report.sh new file mode 100755 index 00000000..ba6e6a90 --- /dev/null +++ b/scripts/cosmos-cost-report.sh @@ -0,0 +1,200 @@ +#!/usr/bin/env bash +# ────────────────────────────────────────────────────────────────── +# Cosmos DB — Cost / RU-consumption report +# +# Identifies which database (product) and which container drives the +# most Azure Cosmos DB cost. For SERVERLESS accounts cost is dominated +# by Request Units (RU) consumed; for PROVISIONED accounts it is the +# provisioned throughput. The script auto-detects the billing mode. +# +# What it reports: +# 1. Account billing mode (serverless vs provisioned) + region +# 2. Actual billed cost (Cost Management, last 30d) by service +# 3. RU consumption by database, ranked, with est. monthly $ +# 4. RU consumption by container for the top databases +# 5. Storage (DataUsage) by database +# +# Prerequisites: +# - Azure CLI installed and authenticated (az login) +# - python3 on PATH (used for JSON shaping + date math) +# - COSMOS_ACCOUNT_NAME and COSMOS_RESOURCE_GROUP set (or pass flags) +# +# Usage: +# COSMOS_ACCOUNT_NAME=cosmos-mywisprai COSMOS_RESOURCE_GROUP=rg-mywisprai \ +# ./scripts/cosmos-cost-report.sh +# ./scripts/cosmos-cost-report.sh -a cosmos-mywisprai -g rg-mywisprai -d 7 +# +# Flags (override env): +# -a, --account Cosmos account name (COSMOS_ACCOUNT_NAME) +# -g, --resource-group Resource group (COSMOS_RESOURCE_GROUP) +# -d, --days Lookback window for RU (DAYS, default 7) +# -t, --top Rows per table (TOP, default 15) +# --drill #DBs to drill into (DRILL, default 3) +# --rate USD per 1M RU (serverless) (SERVERLESS_RU_RATE, 0.25) +# ────────────────────────────────────────────────────────────────── + +set -euo pipefail + +ACCOUNT="${COSMOS_ACCOUNT_NAME:-}" +RG="${COSMOS_RESOURCE_GROUP:-}" +DAYS="${DAYS:-7}" +TOP="${TOP:-15}" +DRILL="${DRILL:-3}" +RU_RATE="${SERVERLESS_RU_RATE:-0.25}" + +while [[ $# -gt 0 ]]; do + case "$1" in + -a|--account) ACCOUNT="$2"; shift 2 ;; + -g|--resource-group) RG="$2"; shift 2 ;; + -d|--days) DAYS="$2"; shift 2 ;; + -t|--top) TOP="$2"; shift 2 ;; + --drill) DRILL="$2"; shift 2 ;; + --rate) RU_RATE="$2"; shift 2 ;; + -h|--help) sed -n '2,46p' "$0"; exit 0 ;; + *) echo "Unknown arg: $1" >&2; exit 2 ;; + esac +done + +[[ -z "$ACCOUNT" ]] && { echo "ERROR: set COSMOS_ACCOUNT_NAME or pass -a" >&2; exit 2; } +[[ -z "$RG" ]] && { echo "ERROR: set COSMOS_RESOURCE_GROUP or pass -g" >&2; exit 2; } +command -v az >/dev/null || { echo "ERROR: az CLI not found" >&2; exit 2; } +command -v python3 >/dev/null || { echo "ERROR: python3 not found" >&2; exit 2; } + +START="$(python3 -c 'import datetime,sys;print((datetime.datetime.utcnow()-datetime.timedelta(days=int(sys.argv[1]))).strftime("%Y-%m-%dT%H:%M:%SZ"))' "$DAYS")" +END="$(python3 -c 'import datetime;print(datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"))')" + +echo "════════════════════════════════════════════════════════════════" +echo " Cosmos DB Cost Report" +echo " Account: $ACCOUNT Resource group: $RG" +echo " RU window: last ${DAYS}d ($START → $END)" +echo "════════════════════════════════════════════════════════════════" + +RID="$(az cosmosdb show -n "$ACCOUNT" -g "$RG" --query id -o tsv)" +CAPS="$(az cosmosdb show -n "$ACCOUNT" -g "$RG" --query "capabilities[].name" -o tsv | tr '\n' ',' || true)" +REGION="$(az cosmosdb show -n "$ACCOUNT" -g "$RG" --query "locations[0].locationName" -o tsv)" +if echo "$CAPS" | grep -q "EnableServerless"; then + MODE="SERVERLESS" +else + MODE="PROVISIONED" +fi +echo "" +echo "▶ Billing mode : $MODE" +echo "▶ Region : $REGION" +echo "▶ Est. RU rate : \$$RU_RATE per 1M RU (serverless)" + +# ── 1. Actual billed cost (Cost Management, last 30d) ───────────── +echo "" +echo "── Actual billed cost — last 30d by service (resource group) ──" +SUB="$(az account show --query id -o tsv)" +CM_FROM="$(python3 -c 'import datetime;print((datetime.datetime.utcnow()-datetime.timedelta(days=30)).strftime("%Y-%m-%dT00:00:00Z"))')" +CM_TO="$(python3 -c 'import datetime;print(datetime.datetime.utcnow().strftime("%Y-%m-%dT00:00:00Z"))')" +CM_BODY="$(mktemp)" +cat >"$CM_BODY" </dev/null | \ + python3 -c ' +import json,sys +try: rows=json.load(sys.stdin) or [] +except Exception: rows=[] +rows=[r for r in rows if isinstance(r,list) and len(r)>=2] +rows.sort(key=lambda r: float(r[0]), reverse=True) +if not rows: + print(" (no cost data — needs Cost Management reader on the subscription)") +for r in rows: + print(" %-28s $%8.2f %s" % (str(r[1])[:28], float(r[0]), r[2] if len(r)>2 else "")) +'; then :; else + echo " (cost query unavailable — continuing with RU metrics)" +fi +rm -f "$CM_BODY" + +# Helper: query a TotalRequestUnits metric split by a dimension and emit +# "\t" lines, ranked desc, with est monthly $. +ru_table() { + local filter="$1" dimidx="$2" + az monitor metrics list --resource "$RID" --metric TotalRequestUnits \ + --aggregation Total --interval P1D --start-time "$START" --end-time "$END" \ + --filter "$filter" --top 500 \ + --query "value[0].timeseries[].{k: metadatavalues[$dimidx].value, ru: sum(data[].total)}" \ + -o json 2>/dev/null +} + +render_ru() { + python3 -c ' +import json,sys +days=float(sys.argv[1]); rate=float(sys.argv[2]); top=int(sys.argv[3]) +try: items=json.load(sys.stdin) or [] +except Exception: items=[] +rows=[] +for it in items: + ru=it.get("ru") or 0 + rows.append((it.get("k") or "", float(ru))) +rows.sort(key=lambda x:x[1], reverse=True) +tot=sum(r[1] for r in rows) or 1.0 +print(" %-28s %16s %8s %10s" % ("name","RU ("+str(int(days))+"d)","share","est $/mo")) +print(" "+"-"*68) +for name,ru in rows[:top]: + est=ru/days*30.0/1_000_000.0*rate + print(" %-28s %16s %7.1f%% %9.2f" % (name[:28], "{:,.0f}".format(ru), 100*ru/tot, est)) +proj=tot/days*30.0/1_000_000.0*rate +print(" "+"-"*68) +print(" %-28s %16s %8s %9.2f" % ("TOTAL", "{:,.0f}".format(tot), "", proj)) +' "$DAYS" "$RU_RATE" "$TOP" +} + +# ── 2. RU by database ───────────────────────────────────────────── +echo "" +echo "── RU consumption by database (product) — last ${DAYS}d ──" +DB_JSON="$(ru_table "DatabaseName eq '*'" 0)" +echo "$DB_JSON" | render_ru + +# ── 3. Drill into the top databases by container ────────────────── +TOP_DBS="$(echo "$DB_JSON" | python3 -c ' +import json,sys +n=int(sys.argv[1]) +try: items=json.load(sys.stdin) or [] +except Exception: items=[] +items=[(i.get("k") or "", float(i.get("ru") or 0)) for i in items] +items.sort(key=lambda x:x[1], reverse=True) +for k,ru in items[:n]: + if k and ru>0: print(k) +' "$DRILL")" + +for DB in $TOP_DBS; do + echo "" + echo "── RU by container in '$DB' — last ${DAYS}d ──" + ru_table "DatabaseName eq '$DB' and CollectionName eq '*'" 0 | render_ru +done + +# ── 4. Storage by database ──────────────────────────────────────── +echo "" +echo "── Storage (DataUsage) by database — latest snapshot ──" +az monitor metrics list --resource "$RID" --metric DataUsage \ + --aggregation Total --interval PT1H --start-time "$START" --end-time "$END" \ + --filter "DatabaseName eq '*'" --top 200 \ + --query "value[0].timeseries[].{k: metadatavalues[0].value, b: max(data[].total)}" \ + -o json 2>/dev/null | python3 -c ' +import json,sys +try: items=json.load(sys.stdin) or [] +except Exception: items=[] +rows=[(i.get("k") or "", float(i.get("b") or 0)) for i in items] +rows.sort(key=lambda x:x[1], reverse=True) +for name,b in rows: + print(" %-28s %10.2f MB" % (name[:28], b/1024/1024)) +if not rows: print(" (no storage data)") +' + +echo "" +echo "════════════════════════════════════════════════════════════════" +echo " Notes:" +echo " - Serverless cost ≈ RU consumed × \$$RU_RATE/1M + storage(\$~0.25/GB-mo)." +echo " - 'est \$/mo' linearly projects the ${DAYS}d window to 30 days." +echo " - High RU + low request count ⇒ expensive per-op (cross-partition" +echo " queries / large docs) — prime rightsizing target." +echo " - A *_locks container burning RU is usually lock-polling overhead." +echo "════════════════════════════════════════════════════════════════"