Alerts

/etc/prometheus/alerts.rules.yml > major_alerting_rules
CPU (2 active)
alert: CPU
expr: sum
  by(instance, job) (rate(node_cpu_seconds_total{mode="system"}[5m])) * 100
  >= 90
labels:
  severity: major
annotations:
  cluster: '{{ $labels.job | toUpper }}'
  description: has high CPU usage ({{ $value | printf "%.2f" }}) over last
    5 minutes.
  summary: CPU average
Labels State Active Since Value
alertname="CPU" instance="b2b-db-main.tct.travel:9100" job="db" severity="major" firing 2026-04-06 03:09:29.882319426 +0000 UTC 105.67017543856707
alertname="CPU" instance="dbext3.tct.travel:9100" job="db" severity="major" firing 2026-04-06 10:01:29.882319426 +0000 UTC 122.44561403497576
Load Average (12 active)
alert: Load
  Average
expr: node_load15 >= 1
labels:
  severity: major
annotations:
  cluster: '{{ $labels.job | toUpper }}'
  description: has high load average ({{ $value }}) over last 15 minutes.
  summary: high load average
Labels State Active Since Value
alertname="Load Average" instance="extranet3.tct.travel:9100" job="extranet" severity="major" firing 2026-04-06 05:14:14.882319426 +0000 UTC 10.35
alertname="Load Average" instance="live5.tct.travel:9100" job="php" severity="major" firing 2026-04-06 10:22:14.882319426 +0000 UTC 1.01
alertname="Load Average" instance="alexandria-b2b-db-main.tct.travel:9100" job="db" severity="major" firing 2026-04-06 08:44:59.882319426 +0000 UTC 1.41
alertname="Load Average" instance="dbext3.tct.travel:9100" job="db" severity="major" firing 2026-04-06 05:36:14.882319426 +0000 UTC 4.65
alertname="Load Average" instance="b2c-db-main.tct.travel:9100" job="db" severity="major" firing 2026-04-06 00:38:29.882319426 +0000 UTC 1.6
alertname="Load Average" instance="dbtb3.tct.travel:9100" job="db" severity="major" firing 2026-04-06 09:54:14.882319426 +0000 UTC 1.01
alertname="Load Average" instance="b2b-db-local-2.tct.travel:9100" job="db" severity="major" firing 2026-04-06 08:47:29.882319426 +0000 UTC 1.02
alertname="Load Average" instance="live6.tct.travel:9100" job="php" severity="major" firing 2026-04-06 10:21:14.882319426 +0000 UTC 1.02
alertname="Load Average" instance="b2b-db-main.tct.travel:9100" job="db" severity="major" firing 2026-01-28 23:41:29.882319426 +0000 UTC 5.68
alertname="Load Average" instance="dbl6.tct.travel:9100" job="db" severity="major" firing 2026-04-06 08:13:44.882319426 +0000 UTC 1.89
alertname="Load Average" instance="dbla1.tct.travel:9100" job="db" severity="major" firing 2026-04-06 08:58:29.882319426 +0000 UTC 1.12
alertname="Load Average" instance="b2b-db-local-1.tct.travel:9100" job="db" severity="major" firing 2026-04-06 07:06:44.882319426 +0000 UTC 2.92
Space Usage (2 active)
alert: Space
  Usage
expr: 100
  - ((node_filesystem_avail_bytes{mountpoint="/"} * 100) / node_filesystem_size_bytes{mountpoint="/"})
  >= 85
labels:
  severity: major
annotations:
  cluster: '{{ $labels.job | toUpper }}'
  description: has high / usage ({{ $value | printf "%.2f" }}%).
  summary: Space Usage
Labels State Active Since Value
alertname="Space Usage" device="/dev/md2" fstype="ext4" instance="dbext3.tct.travel:9100" job="db" mountpoint="/" severity="major" firing 2026-04-01 17:51:14.882319426 +0000 UTC 92.32252048706435
alertname="Space Usage" device="/dev/md2" fstype="ext4" instance="extranet.tct.travel:9100" job="extranet" mountpoint="/" severity="major" firing 2026-03-31 16:37:14.882319426 +0000 UTC 86.48362793532002
Used RAM Memory (0 active)
alert: Used
  RAM Memory
expr: 100
  - ((node_memory_MemAvailable_bytes * 100) / node_memory_MemTotal_bytes) >= 75
labels:
  severity: major
annotations:
  cluster: '{{ $labels.job | toUpper }}'
  description: has high RAM usage ({{ $value | printf "%.2f" }}%).
  summary: Used RAM Memory