-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathprometheus_alerts.yaml
90 lines (90 loc) · 3.82 KB
/
prometheus_alerts.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
"groups":
- "name": "argo-cd"
"rules":
- "alert": "ArgoCdAppOutOfSync"
"annotations":
"dashboard_url": "https://grafana.com/d/argo-cd-application-overview-kask/argocd-application-overview?var-dest_server={{ $labels.dest_server }}&var-project={{ $labels.project }}&var-application={{ $labels.name }}"
"description": "The application {{ $labels.dest_server }}/{{ $labels.project }}/{{ $labels.name }} is out of sync with the sync status {{ $labels.sync_status }} for the past 15m."
"summary": "An ArgoCD Application is Out Of Sync."
"expr": |
sum(
argocd_app_info{
job=~".*",
sync_status!="Synced"
}
) by (job, dest_server, project, name, sync_status)
> 0
"for": "15m"
"labels":
"severity": "warning"
- "alert": "ArgoCdAppUnhealthy"
"annotations":
"dashboard_url": "https://grafana.com/d/argo-cd-application-overview-kask/argocd-application-overview?var-dest_server={{ $labels.dest_server }}&var-project={{ $labels.project }}&var-application={{ $labels.name }}"
"description": "The application {{ $labels.dest_server }}/{{ $labels.project }}/{{ $labels.name }} is unhealthy with the health status {{ $labels.health_status }} for the past 15m."
"summary": "An ArgoCD Application is Unhealthy."
"expr": |
sum(
argocd_app_info{
job=~".*",
health_status!~"Healthy|Progressing"
}
) by (job, dest_server, project, name, health_status)
> 0
"for": "15m"
"labels":
"severity": "warning"
- "alert": "ArgoCdAppAutoSyncDisabled"
"annotations":
"dashboard_url": "https://grafana.com/d/argo-cd-application-overview-kask/argocd-application-overview?var-dest_server={{ $labels.dest_server }}&var-project={{ $labels.project }}&var-application={{ $labels.name }}"
"description": "The application {{ $labels.dest_server }}/{{ $labels.project }}/{{ $labels.name }} has autosync disabled for the past 2h."
"summary": "An ArgoCD Application has AutoSync Disabled."
"expr": |
sum(
argocd_app_info{
job=~".*",
autosync_enabled!="true",
name!~""
}
) by (job, dest_server, project, name, autosync_enabled)
> 0
"for": "2h"
"labels":
"severity": "warning"
- "alert": "ArgoCdAppSyncFailed"
"annotations":
"dashboard_url": "https://grafana.com/d/argo-cd-application-overview-kask/argocd-application-overview?var-dest_server={{ $labels.dest_server }}&var-project={{ $labels.project }}&var-application={{ $labels.name }}"
"description": "The application {{ $labels.dest_server }}/{{ $labels.project }}/{{ $labels.name }} has failed to sync with the status {{ $labels.phase }} the past 10m."
"summary": "An ArgoCD Application has Failed to Sync."
"expr": |
sum(
round(
increase(
argocd_app_sync_total{
job=~".*",
phase!="Succeeded"
}[10m]
)
)
) by (job, dest_server, project, name, phase) > 0
"for": "1m"
"labels":
"severity": "warning"
- "alert": "ArgoCdNotificationDeliveryFailed"
"annotations":
"dashboard_url": "https://grafana.com/d/argo-cd-notifications-overview-kask/argocd-notifications-overview?var-job={{ $labels.job }}&var-exported_service={{ $labels.exported_service }}"
"description": "The notification job {{ $labels.job }} has failed to deliver to {{ $labels.exported_service }} for the past 10m."
"summary": "ArgoCD Notification Delivery Failed."
"expr": |
sum(
round(
increase(
argocd_notifications_deliveries_total{
job=~".*",
succeeded!="true"
}[10m]
)
)
) by (job, exported_service, succeeded) > 0
"for": "1m"
"labels":
"severity": "warning"