diff --git a/NEWS b/NEWS
index 151e0d6..1be3d09 100644
--- a/NEWS
+++ b/NEWS
@@ -1,4 +1,6 @@
1.5.0
+ * #245: Monitor backups with PagerDuty hook integration. See the documentation for more
+ information: https://torsion.org/borgmatic/docs/how-to/monitor-your-backups/#pagerduty-hook
* #255: Add per-action hooks: "before_prune", "after_prune", "before_check", and "after_check".
* #274: Add ~/.config/borgmatic.d as another configuration directory default.
* #277: Customize Healthchecks log level via borgmatic "--monitoring-verbosity" flag.
diff --git a/README.md b/README.md
index 16dc0ea..b2ba6e0 100644
--- a/README.md
+++ b/README.md
@@ -66,6 +66,7 @@ borgmatic is powered by [Borg Backup](https://www.borgbackup.org/).
+
diff --git a/borgmatic/config/schema.yaml b/borgmatic/config/schema.yaml
index 3a09fdf..a228e7a 100644
--- a/borgmatic/config/schema.yaml
+++ b/borgmatic/config/schema.yaml
@@ -567,6 +567,15 @@ map:
for details.
example:
https://cronitor.link/d3x0c1
+ pagerduty:
+ type: str
+ desc: |
+ PagerDuty integration key used to notify PagerDuty when a backup errors. Create
+ an account at https://www.pagerduty.com/ if you'd like to use this service. See
+ https://torsion.org/borgmatic/docs/how-to/monitor-your-backups/#pagerduty-hook
+ for details.
+ example:
+ a177cad45bd374409f78906a810a3074
cronhub:
type: str
desc: |
diff --git a/borgmatic/hooks/dispatch.py b/borgmatic/hooks/dispatch.py
index 206b0d1..6c05cad 100644
--- a/borgmatic/hooks/dispatch.py
+++ b/borgmatic/hooks/dispatch.py
@@ -1,6 +1,6 @@
import logging
-from borgmatic.hooks import cronhub, cronitor, healthchecks, mysql, postgresql
+from borgmatic.hooks import cronhub, cronitor, healthchecks, mysql, pagerduty, postgresql
logger = logging.getLogger(__name__)
@@ -8,6 +8,7 @@ HOOK_NAME_TO_MODULE = {
'healthchecks': healthchecks,
'cronitor': cronitor,
'cronhub': cronhub,
+ 'pagerduty': pagerduty,
'postgresql_databases': postgresql,
'mysql_databases': mysql,
}
diff --git a/borgmatic/hooks/monitor.py b/borgmatic/hooks/monitor.py
index aee2b8f..c4cf576 100644
--- a/borgmatic/hooks/monitor.py
+++ b/borgmatic/hooks/monitor.py
@@ -1,6 +1,6 @@
from enum import Enum
-MONITOR_HOOK_NAMES = ('healthchecks', 'cronitor', 'cronhub')
+MONITOR_HOOK_NAMES = ('healthchecks', 'cronitor', 'cronhub', 'pagerduty')
class State(Enum):
diff --git a/borgmatic/hooks/pagerduty.py b/borgmatic/hooks/pagerduty.py
new file mode 100644
index 0000000..0e613cc
--- /dev/null
+++ b/borgmatic/hooks/pagerduty.py
@@ -0,0 +1,62 @@
+import datetime
+import json
+import logging
+import platform
+
+import requests
+
+from borgmatic.hooks import monitor
+
+logger = logging.getLogger(__name__)
+
+EVENTS_API_URL = 'https://events.pagerduty.com/v2/enqueue'
+
+
+def ping_monitor(integration_key, config_filename, state, monitoring_log_level, dry_run):
+ '''
+ If this is an error state, create a PagerDuty event with the given integration key. Use the
+ given configuration filename in any log entries. If this is a dry run, then don't actually
+ create an event.
+ '''
+ if state != monitor.State.FAIL:
+ logger.debug(
+ '{}: Ignoring unsupported monitoring {} in PagerDuty hook'.format(
+ config_filename, state.name.lower()
+ )
+ )
+ return
+
+ dry_run_label = ' (dry run; not actually sending)' if dry_run else ''
+ logger.info('{}: Sending failure event to PagerDuty {}'.format(config_filename, dry_run_label))
+
+ if dry_run:
+ return
+
+ hostname = platform.node()
+ local_timestamp = (
+ datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc).astimezone().isoformat()
+ )
+ payload = json.dumps(
+ {
+ 'routing_key': integration_key,
+ 'event_action': 'trigger',
+ 'payload': {
+ 'summary': 'backup failed on {}'.format(hostname),
+ 'severity': 'error',
+ 'source': hostname,
+ 'timestamp': local_timestamp,
+ 'component': 'borgmatic',
+ 'group': 'backups',
+ 'class': 'backup failure',
+ 'custom_details': {
+ 'hostname': hostname,
+ 'configuration filename': config_filename,
+ 'server time': local_timestamp,
+ },
+ },
+ }
+ )
+ logger.debug('{}: Using PagerDuty payload: {}'.format(config_filename, payload))
+
+ logging.getLogger('urllib3').setLevel(logging.ERROR)
+ requests.post(EVENTS_API_URL, data=payload.encode('utf-8'))
diff --git a/docs/how-to/monitor-your-backups.md b/docs/how-to/monitor-your-backups.md
index c56a151..064c408 100644
--- a/docs/how-to/monitor-your-backups.md
+++ b/docs/how-to/monitor-your-backups.md
@@ -28,14 +28,15 @@ hooks](https://torsion.org/borgmatic/docs/how-to/monitor-your-backups/#error-hoo
below for how to configure this.
4. **borgmatic monitoring hooks**: This feature integrates with monitoring
services like [Healthchecks](https://healthchecks.io/),
-[Cronitor](https://cronitor.io), and [Cronhub](https://cronhub.io), and pings
-these services whenever borgmatic runs. That way, you'll receive an alert when
-something goes wrong or the service doesn't hear from borgmatic for a
-configured interval. See
-[Healthchecks
+[Cronitor](https://cronitor.io), [Cronhub](https://cronhub.io), and
+[PagerDuty](https://www.pagerduty.com/) and pings these services whenever
+borgmatic runs. That way, you'll receive an alert when something goes wrong or
+(for certain hooks) the service doesn't hear from borgmatic for a configured
+interval. See [Healthchecks
hook](https://torsion.org/borgmatic/docs/how-to/monitor-your-backups/#healthchecks-hook), [Cronitor
-hook](https://torsion.org/borgmatic/docs/how-to/monitor-your-backups/#cronitor-hook), and [Cronhub
-hook](https://torsion.org/borgmatic/docs/how-to/monitor-your-backups/#cronhub-hook)
+hook](https://torsion.org/borgmatic/docs/how-to/monitor-your-backups/#cronitor-hook), [Cronhub
+hook](https://torsion.org/borgmatic/docs/how-to/monitor-your-backups/#cronhub-hook), and
+[PagerDuty hook](https://torsion.org/borgmatic/docs/how-to/monitor-your-backups/#pagerduty-hook)
below for how to configure this.
3. **Third-party monitoring software**: You can use traditional monitoring
software to consume borgmatic JSON output and track when the last
@@ -200,6 +201,32 @@ mechanisms](https://docs.cronhub.io/integrations.html) when backups fail
or it doesn't hear from borgmatic for a certain period of time.
+## PagerDuty hook
+
+[PagerDuty](https://cronhub.io/) provides incident monitoring and alerting,
+and borgmatic has built-in integration with it. Once you create a PagerDuty
+account and service
+on their site, all you need to do is configure borgmatic with the unique
+"Integration Key" for your service. Here's an example:
+
+
+```yaml
+hooks:
+ pagerduty: a177cad45bd374409f78906a810a3074
+```
+
+With this hook in place, borgmatic creates a PagerDuty event for your service
+whenever backups fail. Specifically, if an error occurs during a `create`,
+`prune`, or `check` action, borgmatic sends an event to PagerDuty after the
+`on_error` hooks run. Note that borgmatic does not contact PagerDuty when a
+backup starts or ends without error.
+
+You can configure PagerDuty to notify you by a [variety of
+mechanisms](https://support.pagerduty.com/docs/notifications) when backups
+fail.
+
+
## Scripting borgmatic
To consume the output of borgmatic in other software, you can include an
diff --git a/docs/static/pagerduty.png b/docs/static/pagerduty.png
new file mode 100644
index 0000000..c60c63e
Binary files /dev/null and b/docs/static/pagerduty.png differ
diff --git a/tests/unit/hooks/test_pagerduty.py b/tests/unit/hooks/test_pagerduty.py
new file mode 100644
index 0000000..76c5451
--- /dev/null
+++ b/tests/unit/hooks/test_pagerduty.py
@@ -0,0 +1,35 @@
+from flexmock import flexmock
+
+from borgmatic.hooks import pagerduty as module
+
+
+def test_ping_monitor_ignores_start_state():
+ flexmock(module.requests).should_receive('post').never()
+
+ module.ping_monitor(
+ 'abc123', 'config.yaml', module.monitor.State.START, monitoring_log_level=1, dry_run=False
+ )
+
+
+def test_ping_monitor_ignores_finish_state():
+ flexmock(module.requests).should_receive('post').never()
+
+ module.ping_monitor(
+ 'abc123', 'config.yaml', module.monitor.State.FINISH, monitoring_log_level=1, dry_run=False
+ )
+
+
+def test_ping_monitor_calls_api_for_fail_state():
+ flexmock(module.requests).should_receive('post')
+
+ module.ping_monitor(
+ 'abc123', 'config.yaml', module.monitor.State.FAIL, monitoring_log_level=1, dry_run=False
+ )
+
+
+def test_ping_monitor_dry_run_does_not_call_api():
+ flexmock(module.requests).should_receive('post').never()
+
+ module.ping_monitor(
+ 'abc123', 'config.yaml', module.monitor.State.FAIL, monitoring_log_level=1, dry_run=True
+ )