Alerts

/etc/prometheus/alerts.rules.yml > major_alerting_rules
CPU (1 active)
alert: CPU
expr: sum
  by(instance, job) (rate(node_cpu_seconds_total{mode="system"}[5m])) * 100
  >= 90
labels:
  severity: major
annotations:
  cluster: '{{ $labels.job | toUpper }}'
  description: has high CPU usage ({{ $value | printf "%.2f" }}) over last
    5 minutes.
  summary: CPU average
Labels State Active Since Value
alertname="CPU" instance="b2b-db-main.tct.travel:9100" job="db" severity="major" firing 2026-02-05 05:53:29.882319426 +0000 UTC 171.64561403519068
Load Average (16 active)
alert: Load
  Average
expr: node_load15 >= 1
labels:
  severity: major
annotations:
  cluster: '{{ $labels.job | toUpper }}'
  description: has high load average ({{ $value }}) over last 15 minutes.
  summary: high load average
Labels State Active Since Value
alertname="Load Average" instance="tb4.tct.travel:9100" job="php" severity="major" firing 2026-02-05 08:46:29.882319426 +0000 UTC 6.65
alertname="Load Average" instance="b2b-db-local-1.tct.travel:9100" job="db" severity="major" firing 2026-02-05 06:01:44.882319426 +0000 UTC 15.31
alertname="Load Average" instance="tb3.tct.travel:9100" job="php" severity="major" firing 2026-02-05 08:20:59.882319426 +0000 UTC 10.12
alertname="Load Average" instance="stage-b2b.tct.travel:9100" job="php" severity="major" firing 2026-01-01 12:18:59.882319426 +0000 UTC 303.18
alertname="Load Average" instance="dbtb5.tct.travel:9100" job="db" severity="major" firing 2026-02-05 08:53:59.882319426 +0000 UTC 2.51
alertname="Load Average" instance="extranet3.tct.travel:9100" job="extranet" severity="major" firing 2026-02-05 07:55:44.882319426 +0000 UTC 3.62
alertname="Load Average" instance="alexandria-b2b-db-main.tct.travel:9100" job="db" severity="major" firing 2026-02-05 07:29:44.882319426 +0000 UTC 4.12
alertname="Load Average" instance="dbext3.tct.travel:9100" job="db" severity="major" firing 2026-02-05 06:18:14.882319426 +0000 UTC 27.56
alertname="Load Average" instance="b2c-db-main.tct.travel:9100" job="db" severity="major" firing 2026-02-05 01:11:59.882319426 +0000 UTC 3.03
alertname="Load Average" instance="dbtb3.tct.travel:9100" job="db" severity="major" firing 2026-02-05 09:51:59.882319426 +0000 UTC 1.19
alertname="Load Average" instance="tb5.tct.travel:9100" job="php" severity="major" firing 2026-02-05 08:29:44.882319426 +0000 UTC 42.19
alertname="Load Average" instance="b2b-db-main.tct.travel:9100" job="db" severity="major" firing 2026-01-28 23:41:29.882319426 +0000 UTC 6.54
alertname="Load Average" instance="b2b-db-local-2.tct.travel:9100" job="db" severity="major" firing 2026-02-05 08:09:14.882319426 +0000 UTC 2.57
alertname="Load Average" instance="dbl6.tct.travel:9100" job="db" severity="major" firing 2026-02-05 04:14:29.882319426 +0000 UTC 2
alertname="Load Average" instance="tb6.tct.travel:9100" job="php" severity="major" firing 2026-02-05 08:20:44.882319426 +0000 UTC 10.03
alertname="Load Average" instance="dbla1.tct.travel:9100" job="db" severity="major" firing 2026-02-04 19:30:44.882319426 +0000 UTC 2.34
Space Usage (0 active)
alert: Space
  Usage
expr: 100
  - ((node_filesystem_avail_bytes{mountpoint="/"} * 100) / node_filesystem_size_bytes{mountpoint="/"})
  >= 85
labels:
  severity: major
annotations:
  cluster: '{{ $labels.job | toUpper }}'
  description: has high / usage ({{ $value | printf "%.2f" }}%).
  summary: Space Usage
Used RAM Memory (0 active)
alert: Used
  RAM Memory
expr: 100
  - ((node_memory_MemAvailable_bytes * 100) / node_memory_MemTotal_bytes) >= 75
labels:
  severity: major
annotations:
  cluster: '{{ $labels.job | toUpper }}'
  description: has high RAM usage ({{ $value | printf "%.2f" }}%).
  summary: Used RAM Memory