feat(scripts): add Cosmos DB cost report tooling (.sh + .ps1)

Reports billed cost, RU by database, RU by container drill-down, and
storage for the cosmos-mywisprai account. Auto-detects serverless vs
provisioned billing mode.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
Saravanakumar D 2026-05-30 23:28:12 -07:00
parent cad75b1c02
commit 6d66355a22
2 changed files with 394 additions and 0 deletions

View File

@ -0,0 +1,194 @@
#Requires -Version 5.1
<#
.SYNOPSIS
Cosmos DB Cost / RU-consumption report.
.DESCRIPTION
Identifies which database (product) and which container drives the most
Azure Cosmos DB cost. For SERVERLESS accounts cost is dominated by Request
Units (RU) consumed; for PROVISIONED accounts it is the provisioned
throughput. The billing mode is auto-detected.
Reports:
1. Account billing mode (serverless vs provisioned) + region
2. Actual billed cost (Cost Management, last 30d) by service
3. RU consumption by database, ranked, with est. monthly $
4. RU consumption by container for the top databases
5. Storage (DataUsage) by database
.PARAMETER Account
Cosmos account name. Falls back to $env:COSMOS_ACCOUNT_NAME.
.PARAMETER ResourceGroup
Resource group. Falls back to $env:COSMOS_RESOURCE_GROUP.
.PARAMETER Days
Lookback window (days) for RU metrics. Default 7 (or $env:DAYS).
.PARAMETER Top
Rows per table. Default 15 (or $env:TOP).
.PARAMETER Drill
Number of top databases to drill into by container. Default 3.
.PARAMETER Rate
USD per 1M RU (serverless). Default 0.25 (or $env:SERVERLESS_RU_RATE).
.EXAMPLE
$env:COSMOS_ACCOUNT_NAME='cosmos-mywisprai'; $env:COSMOS_RESOURCE_GROUP='rg-mywisprai'
./scripts/cosmos-cost-report.ps1
.EXAMPLE
./scripts/cosmos-cost-report.ps1 -Account cosmos-mywisprai -ResourceGroup rg-mywisprai -Days 7
.NOTES
Prerequisites: Azure CLI installed and authenticated (az login).
#>
[CmdletBinding()]
param(
[string]$Account = $env:COSMOS_ACCOUNT_NAME,
[string]$ResourceGroup = $env:COSMOS_RESOURCE_GROUP,
[int] $Days = $(if ($env:DAYS) { [int]$env:DAYS } else { 7 }),
[int] $Top = $(if ($env:TOP) { [int]$env:TOP } else { 15 }),
[int] $Drill = 3,
[double]$Rate = $(if ($env:SERVERLESS_RU_RATE) { [double]$env:SERVERLESS_RU_RATE } else { 0.25 })
)
$ErrorActionPreference = 'Stop'
if (-not $Account) { Write-Error 'Set COSMOS_ACCOUNT_NAME or pass -Account'; exit 2 }
if (-not $ResourceGroup) { Write-Error 'Set COSMOS_RESOURCE_GROUP or pass -ResourceGroup'; exit 2 }
if (-not (Get-Command az -ErrorAction SilentlyContinue)) { Write-Error 'az CLI not found'; exit 2 }
$start = (Get-Date).ToUniversalTime().AddDays(-$Days).ToString('yyyy-MM-ddTHH:mm:ssZ')
$end = (Get-Date).ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ssZ')
Write-Host '════════════════════════════════════════════════════════════════'
Write-Host ' Cosmos DB Cost Report'
Write-Host " Account: $Account Resource group: $ResourceGroup"
Write-Host " RU window: last ${Days}d ($start -> $end)"
Write-Host '════════════════════════════════════════════════════════════════'
$rid = az cosmosdb show -n $Account -g $ResourceGroup --query id -o tsv
$caps = az cosmosdb show -n $Account -g $ResourceGroup --query "capabilities[].name" -o tsv
$region = az cosmosdb show -n $Account -g $ResourceGroup --query "locations[0].locationName" -o tsv
$mode = if ($caps -match 'EnableServerless') { 'SERVERLESS' } else { 'PROVISIONED' }
Write-Host ''
Write-Host "▶ Billing mode : $mode"
Write-Host "▶ Region : $region"
Write-Host ("▶ Est. RU rate : `$$Rate per 1M RU (serverless)")
# ── 1. Actual billed cost (Cost Management, last 30d) ─────────────
Write-Host ''
Write-Host '── Actual billed cost — last 30d by service (resource group) ──'
$sub = az account show --query id -o tsv
$cmFrom = (Get-Date).ToUniversalTime().AddDays(-30).ToString('yyyy-MM-ddT00:00:00Z')
$cmTo = (Get-Date).ToUniversalTime().ToString('yyyy-MM-ddT00:00:00Z')
$cmBody = @{
type = 'ActualCost'
timeframe = 'Custom'
timePeriod = @{ from = $cmFrom; to = $cmTo }
dataset = @{
granularity = 'None'
aggregation = @{ totalCost = @{ name = 'Cost'; function = 'Sum' } }
grouping = @(@{ type = 'Dimension'; name = 'ServiceName' })
}
} | ConvertTo-Json -Depth 10 -Compress
$tmp = New-TemporaryFile
Set-Content -Path $tmp -Value $cmBody -NoNewline
try {
$rows = az rest --method post `
--url "https://management.azure.com/subscriptions/$sub/resourceGroups/$ResourceGroup/providers/Microsoft.CostManagement/query?api-version=2023-11-01" `
--body "@$tmp" --headers "Content-Type=application/json" `
--query "properties.rows" -o json 2>$null | ConvertFrom-Json
if ($rows) {
$rows | Sort-Object { [double]$_[0] } -Descending | ForEach-Object {
' {0,-28} ${1,8:N2} {2}' -f ([string]$_[1]).Substring(0,[Math]::Min(28,([string]$_[1]).Length)), [double]$_[0], $_[2]
}
} else {
Write-Host ' (no cost data — needs Cost Management reader on the subscription)'
}
} catch {
Write-Host ' (cost query unavailable — continuing with RU metrics)'
} finally {
Remove-Item $tmp -ErrorAction SilentlyContinue
}
# Helper: query TotalRequestUnits split by a dimension, return [pscustomobject]@{ k; ru }
function Get-RuByDimension {
param([string]$Filter)
$json = az monitor metrics list --resource $rid --metric TotalRequestUnits `
--aggregation Total --interval P1D --start-time $start --end-time $end `
--filter $Filter --top 500 `
--query "value[0].timeseries[].{k: metadatavalues[0].value, ru: sum(data[].total)}" `
-o json 2>$null
if (-not $json) { return @() }
$parsed = $json | ConvertFrom-Json
if (-not $parsed) { return @() }
,@($parsed | ForEach-Object {
[pscustomobject]@{ k = $(if ($_.k) { $_.k } else { '<none>' }); ru = [double]($_.ru) }
})
}
function Show-RuTable {
param([object[]]$Rows)
$ranked = @($Rows | Sort-Object ru -Descending)
$total = ($ranked | Measure-Object ru -Sum).Sum
if (-not $total -or $total -eq 0) { $total = 1.0 }
' {0,-28} {1,16} {2,8} {3,10}' -f 'name', "RU (${Days}d)", 'share', 'est $/mo' | Write-Host
' ' + ('-' * 68) | Write-Host
foreach ($r in ($ranked | Select-Object -First $Top)) {
$est = $r.ru / $Days * 30.0 / 1000000.0 * $Rate
$share = 100.0 * $r.ru / $total
$name = ([string]$r.k).Substring(0,[Math]::Min(28,([string]$r.k).Length))
' {0,-28} {1,16} {2,7:N1}% {3,9:N2}' -f $name, ('{0:N0}' -f $r.ru), $share, $est | Write-Host
}
$proj = $total / $Days * 30.0 / 1000000.0 * $Rate
' ' + ('-' * 68) | Write-Host
' {0,-28} {1,16} {2,8} {3,9:N2}' -f 'TOTAL', ('{0:N0}' -f $total), '', $proj | Write-Host
}
# ── 2. RU by database ─────────────────────────────────────────────
Write-Host ''
Write-Host "── RU consumption by database (product) — last ${Days}d ──"
$dbRows = Get-RuByDimension -Filter "DatabaseName eq '*'"
Show-RuTable -Rows $dbRows
# ── 3. Drill into the top databases by container ──────────────────
$topDbs = @($dbRows | Where-Object { $_.ru -gt 0 -and $_.k -ne '<none>' } |
Sort-Object ru -Descending | Select-Object -First $Drill -ExpandProperty k)
foreach ($db in $topDbs) {
Write-Host ''
Write-Host "── RU by container in '$db' — last ${Days}d ──"
$coll = Get-RuByDimension -Filter "DatabaseName eq '$db' and CollectionName eq '*'"
Show-RuTable -Rows $coll
}
# ── 4. Storage by database ────────────────────────────────────────
Write-Host ''
Write-Host '── Storage (DataUsage) by database — latest snapshot ──'
$storeJson = az monitor metrics list --resource $rid --metric DataUsage `
--aggregation Total --interval PT1H --start-time $start --end-time $end `
--filter "DatabaseName eq '*'" --top 200 `
--query "value[0].timeseries[].{k: metadatavalues[0].value, b: max(data[].total)}" `
-o json 2>$null
$store = if ($storeJson) { @($storeJson | ConvertFrom-Json) } else { @() }
if ($store.Count -eq 0) {
Write-Host ' (no storage data)'
} else {
foreach ($s in ($store | Sort-Object { [double]$_.b } -Descending)) {
$name = ([string]$s.k).Substring(0,[Math]::Min(28,([string]$s.k).Length))
' {0,-28} {1,10:N2} MB' -f $name, ([double]$s.b / 1MB) | Write-Host
}
}
Write-Host ''
Write-Host '════════════════════════════════════════════════════════════════'
Write-Host ' Notes:'
Write-Host " - Serverless cost ~= RU consumed x `$$Rate/1M + storage(`$~0.25/GB-mo)."
Write-Host " - 'est `$/mo' linearly projects the ${Days}d window to 30 days."
Write-Host ' - High RU + low request count => expensive per-op (cross-partition'
Write-Host ' queries / large docs) — prime rightsizing target.'
Write-Host ' - A *_locks container burning RU is usually lock-polling overhead.'
Write-Host '════════════════════════════════════════════════════════════════'

200
scripts/cosmos-cost-report.sh Executable file
View File

@ -0,0 +1,200 @@
#!/usr/bin/env bash
# ──────────────────────────────────────────────────────────────────
# Cosmos DB — Cost / RU-consumption report
#
# Identifies which database (product) and which container drives the
# most Azure Cosmos DB cost. For SERVERLESS accounts cost is dominated
# by Request Units (RU) consumed; for PROVISIONED accounts it is the
# provisioned throughput. The script auto-detects the billing mode.
#
# What it reports:
# 1. Account billing mode (serverless vs provisioned) + region
# 2. Actual billed cost (Cost Management, last 30d) by service
# 3. RU consumption by database, ranked, with est. monthly $
# 4. RU consumption by container for the top databases
# 5. Storage (DataUsage) by database
#
# Prerequisites:
# - Azure CLI installed and authenticated (az login)
# - python3 on PATH (used for JSON shaping + date math)
# - COSMOS_ACCOUNT_NAME and COSMOS_RESOURCE_GROUP set (or pass flags)
#
# Usage:
# COSMOS_ACCOUNT_NAME=cosmos-mywisprai COSMOS_RESOURCE_GROUP=rg-mywisprai \
# ./scripts/cosmos-cost-report.sh
# ./scripts/cosmos-cost-report.sh -a cosmos-mywisprai -g rg-mywisprai -d 7
#
# Flags (override env):
# -a, --account Cosmos account name (COSMOS_ACCOUNT_NAME)
# -g, --resource-group Resource group (COSMOS_RESOURCE_GROUP)
# -d, --days Lookback window for RU (DAYS, default 7)
# -t, --top Rows per table (TOP, default 15)
# --drill #DBs to drill into (DRILL, default 3)
# --rate USD per 1M RU (serverless) (SERVERLESS_RU_RATE, 0.25)
# ──────────────────────────────────────────────────────────────────
set -euo pipefail
ACCOUNT="${COSMOS_ACCOUNT_NAME:-}"
RG="${COSMOS_RESOURCE_GROUP:-}"
DAYS="${DAYS:-7}"
TOP="${TOP:-15}"
DRILL="${DRILL:-3}"
RU_RATE="${SERVERLESS_RU_RATE:-0.25}"
while [[ $# -gt 0 ]]; do
case "$1" in
-a|--account) ACCOUNT="$2"; shift 2 ;;
-g|--resource-group) RG="$2"; shift 2 ;;
-d|--days) DAYS="$2"; shift 2 ;;
-t|--top) TOP="$2"; shift 2 ;;
--drill) DRILL="$2"; shift 2 ;;
--rate) RU_RATE="$2"; shift 2 ;;
-h|--help) sed -n '2,46p' "$0"; exit 0 ;;
*) echo "Unknown arg: $1" >&2; exit 2 ;;
esac
done
[[ -z "$ACCOUNT" ]] && { echo "ERROR: set COSMOS_ACCOUNT_NAME or pass -a" >&2; exit 2; }
[[ -z "$RG" ]] && { echo "ERROR: set COSMOS_RESOURCE_GROUP or pass -g" >&2; exit 2; }
command -v az >/dev/null || { echo "ERROR: az CLI not found" >&2; exit 2; }
command -v python3 >/dev/null || { echo "ERROR: python3 not found" >&2; exit 2; }
START="$(python3 -c 'import datetime,sys;print((datetime.datetime.utcnow()-datetime.timedelta(days=int(sys.argv[1]))).strftime("%Y-%m-%dT%H:%M:%SZ"))' "$DAYS")"
END="$(python3 -c 'import datetime;print(datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"))')"
echo "════════════════════════════════════════════════════════════════"
echo " Cosmos DB Cost Report"
echo " Account: $ACCOUNT Resource group: $RG"
echo " RU window: last ${DAYS}d ($START$END)"
echo "════════════════════════════════════════════════════════════════"
RID="$(az cosmosdb show -n "$ACCOUNT" -g "$RG" --query id -o tsv)"
CAPS="$(az cosmosdb show -n "$ACCOUNT" -g "$RG" --query "capabilities[].name" -o tsv | tr '\n' ',' || true)"
REGION="$(az cosmosdb show -n "$ACCOUNT" -g "$RG" --query "locations[0].locationName" -o tsv)"
if echo "$CAPS" | grep -q "EnableServerless"; then
MODE="SERVERLESS"
else
MODE="PROVISIONED"
fi
echo ""
echo "▶ Billing mode : $MODE"
echo "▶ Region : $REGION"
echo "▶ Est. RU rate : \$$RU_RATE per 1M RU (serverless)"
# ── 1. Actual billed cost (Cost Management, last 30d) ─────────────
echo ""
echo "── Actual billed cost — last 30d by service (resource group) ──"
SUB="$(az account show --query id -o tsv)"
CM_FROM="$(python3 -c 'import datetime;print((datetime.datetime.utcnow()-datetime.timedelta(days=30)).strftime("%Y-%m-%dT00:00:00Z"))')"
CM_TO="$(python3 -c 'import datetime;print(datetime.datetime.utcnow().strftime("%Y-%m-%dT00:00:00Z"))')"
CM_BODY="$(mktemp)"
cat >"$CM_BODY" <<JSON
{"type":"ActualCost","timeframe":"Custom","timePeriod":{"from":"$CM_FROM","to":"$CM_TO"},
"dataset":{"granularity":"None","aggregation":{"totalCost":{"name":"Cost","function":"Sum"}},
"grouping":[{"type":"Dimension","name":"ServiceName"}]}}
JSON
if az rest --method post \
--url "https://management.azure.com/subscriptions/$SUB/resourceGroups/$RG/providers/Microsoft.CostManagement/query?api-version=2023-11-01" \
--body "@$CM_BODY" --headers "Content-Type=application/json" \
--query "properties.rows" -o json 2>/dev/null | \
python3 -c '
import json,sys
try: rows=json.load(sys.stdin) or []
except Exception: rows=[]
rows=[r for r in rows if isinstance(r,list) and len(r)>=2]
rows.sort(key=lambda r: float(r[0]), reverse=True)
if not rows:
print(" (no cost data — needs Cost Management reader on the subscription)")
for r in rows:
print(" %-28s $%8.2f %s" % (str(r[1])[:28], float(r[0]), r[2] if len(r)>2 else ""))
'; then :; else
echo " (cost query unavailable — continuing with RU metrics)"
fi
rm -f "$CM_BODY"
# Helper: query a TotalRequestUnits metric split by a dimension and emit
# "<name>\t<total_ru>" lines, ranked desc, with est monthly $.
ru_table() {
local filter="$1" dimidx="$2"
az monitor metrics list --resource "$RID" --metric TotalRequestUnits \
--aggregation Total --interval P1D --start-time "$START" --end-time "$END" \
--filter "$filter" --top 500 \
--query "value[0].timeseries[].{k: metadatavalues[$dimidx].value, ru: sum(data[].total)}" \
-o json 2>/dev/null
}
render_ru() {
python3 -c '
import json,sys
days=float(sys.argv[1]); rate=float(sys.argv[2]); top=int(sys.argv[3])
try: items=json.load(sys.stdin) or []
except Exception: items=[]
rows=[]
for it in items:
ru=it.get("ru") or 0
rows.append((it.get("k") or "<none>", float(ru)))
rows.sort(key=lambda x:x[1], reverse=True)
tot=sum(r[1] for r in rows) or 1.0
print(" %-28s %16s %8s %10s" % ("name","RU ("+str(int(days))+"d)","share","est $/mo"))
print(" "+"-"*68)
for name,ru in rows[:top]:
est=ru/days*30.0/1_000_000.0*rate
print(" %-28s %16s %7.1f%% %9.2f" % (name[:28], "{:,.0f}".format(ru), 100*ru/tot, est))
proj=tot/days*30.0/1_000_000.0*rate
print(" "+"-"*68)
print(" %-28s %16s %8s %9.2f" % ("TOTAL", "{:,.0f}".format(tot), "", proj))
' "$DAYS" "$RU_RATE" "$TOP"
}
# ── 2. RU by database ─────────────────────────────────────────────
echo ""
echo "── RU consumption by database (product) — last ${DAYS}d ──"
DB_JSON="$(ru_table "DatabaseName eq '*'" 0)"
echo "$DB_JSON" | render_ru
# ── 3. Drill into the top databases by container ──────────────────
TOP_DBS="$(echo "$DB_JSON" | python3 -c '
import json,sys
n=int(sys.argv[1])
try: items=json.load(sys.stdin) or []
except Exception: items=[]
items=[(i.get("k") or "", float(i.get("ru") or 0)) for i in items]
items.sort(key=lambda x:x[1], reverse=True)
for k,ru in items[:n]:
if k and ru>0: print(k)
' "$DRILL")"
for DB in $TOP_DBS; do
echo ""
echo "── RU by container in '$DB' — last ${DAYS}d ──"
ru_table "DatabaseName eq '$DB' and CollectionName eq '*'" 0 | render_ru
done
# ── 4. Storage by database ────────────────────────────────────────
echo ""
echo "── Storage (DataUsage) by database — latest snapshot ──"
az monitor metrics list --resource "$RID" --metric DataUsage \
--aggregation Total --interval PT1H --start-time "$START" --end-time "$END" \
--filter "DatabaseName eq '*'" --top 200 \
--query "value[0].timeseries[].{k: metadatavalues[0].value, b: max(data[].total)}" \
-o json 2>/dev/null | python3 -c '
import json,sys
try: items=json.load(sys.stdin) or []
except Exception: items=[]
rows=[(i.get("k") or "<none>", float(i.get("b") or 0)) for i in items]
rows.sort(key=lambda x:x[1], reverse=True)
for name,b in rows:
print(" %-28s %10.2f MB" % (name[:28], b/1024/1024))
if not rows: print(" (no storage data)")
'
echo ""
echo "════════════════════════════════════════════════════════════════"
echo " Notes:"
echo " - Serverless cost ≈ RU consumed × \$$RU_RATE/1M + storage(\$~0.25/GB-mo)."
echo " - 'est \$/mo' linearly projects the ${DAYS}d window to 30 days."
echo " - High RU + low request count ⇒ expensive per-op (cross-partition"
echo " queries / large docs) — prime rightsizing target."
echo " - A *_locks container burning RU is usually lock-polling overhead."
echo "════════════════════════════════════════════════════════════════"