Allow before_backup and similiar hooks to exit with a soft failure without altering the monitoring status (#292).

2020-06-02 14:33:41 -07:00 · 2020-06-02 14:33:41 -07:00 · 398665be9e
commit 398665be9e
parent 6db232d4ac
7 changed files with 82 additions and 44 deletions
--- a/4
+++ b/4
@ -1,4 +1,8 @@
 1.5.6.dev0
+ * #292: Allow before_backup and similiar hooks to exit with a soft failure without altering the
+   monitoring status on Healthchecks or other providers. Support this by waiting to ping monitoring
+   services with a "start" status until after before_* hooks finish. Failures in before_* hooks
+   still trigger a monitoring "fail" status.
 * #316: Fix hang when a stale database dump named pipe from an aborted borgmatic run remains on
   disk.
 * Tweak comment indentation in generated configuration file for clarity.
--- a/borgmatic/commands/borgmatic.py
+++ b/borgmatic/commands/borgmatic.py
@ -59,11 +59,10 @@ def run_configuration(config_filename, config, arguments):
    try:
        if prune_create_or_check:
            dispatch.call_hooks(
-                'ping_monitor',
+                'initialize_monitor',
                hooks,
                config_filename,
                monitor.MONITOR_HOOK_NAMES,
-                monitor.State.START,
                monitoring_log_level,
                global_arguments.dry_run,
            )
@ -91,6 +90,16 @@ def run_configuration(config_filename, config, arguments):
                'pre-check',
                global_arguments.dry_run,
            )
+        if prune_create_or_check:
+            dispatch.call_hooks(
+                'ping_monitor',
+                hooks,
+                config_filename,
+                monitor.MONITOR_HOOK_NAMES,
+                monitor.State.START,
+                monitoring_log_level,
+                global_arguments.dry_run,
+            )
    except (OSError, CalledProcessError) as error:
        if command.considered_soft_failure(config_filename, error):
            return
@ -123,6 +132,16 @@ def run_configuration(config_filename, config, arguments):

    if not encountered_error:
        try:
+            if prune_create_or_check:
+                dispatch.call_hooks(
+                    'ping_monitor',
+                    hooks,
+                    config_filename,
+                    monitor.MONITOR_HOOK_NAMES,
+                    monitor.State.FINISH,
+                    monitoring_log_level,
+                    global_arguments.dry_run,
+                )
            if 'prune' in arguments:
                command.execute_hook(
                    hooks.get('after_prune'),
@ -155,16 +174,6 @@ def run_configuration(config_filename, config, arguments):
                    'post-check',
                    global_arguments.dry_run,
                )
-            if {'prune', 'create', 'check'}.intersection(arguments):
-                dispatch.call_hooks(
-                    'ping_monitor',
-                    hooks,
-                    config_filename,
-                    monitor.MONITOR_HOOK_NAMES,
-                    monitor.State.FINISH,
-                    monitoring_log_level,
-                    global_arguments.dry_run,
-                )
        except (OSError, CalledProcessError) as error:
            if command.considered_soft_failure(config_filename, error):
                return
@ -176,6 +185,15 @@ def run_configuration(config_filename, config, arguments):

    if encountered_error and prune_create_or_check:
        try:
+            dispatch.call_hooks(
+                'ping_monitor',
+                hooks,
+                config_filename,
+                monitor.MONITOR_HOOK_NAMES,
+                monitor.State.FAIL,
+                monitoring_log_level,
+                global_arguments.dry_run,
+            )
            command.execute_hook(
                hooks.get('on_error'),
                hooks.get('umask'),
@ -186,15 +204,6 @@ def run_configuration(config_filename, config, arguments):
                error=encountered_error,
                output=getattr(encountered_error, 'output', ''),
            )
-            dispatch.call_hooks(
-                'ping_monitor',
-                hooks,
-                config_filename,
-                monitor.MONITOR_HOOK_NAMES,
-                monitor.State.FAIL,
-                monitoring_log_level,
-                global_arguments.dry_run,
-            )
        except (OSError, CalledProcessError) as error:
            if command.considered_soft_failure(config_filename, error):
                return
--- a/borgmatic/hooks/cronhub.py
+++ b/borgmatic/hooks/cronhub.py
@ -13,6 +13,13 @@ MONITOR_STATE_TO_CRONHUB = {
 }


+def initialize_monitor(ping_url, config_filename, monitoring_log_level, dry_run):
+    '''
+    No initialization is necessary for this monitor.
+    '''
+    pass
+
+
 def ping_monitor(ping_url, config_filename, state, monitoring_log_level, dry_run):
    '''
    Ping the given Cronhub URL, modified with the monitor.State. Use the given configuration
--- a/borgmatic/hooks/cronitor.py
+++ b/borgmatic/hooks/cronitor.py
@ -13,6 +13,13 @@ MONITOR_STATE_TO_CRONITOR = {
 }


+def initialize_monitor(ping_url, config_filename, monitoring_log_level, dry_run):
+    '''
+    No initialization is necessary for this monitor.
+    '''
+    pass
+
+
 def ping_monitor(ping_url, config_filename, state, monitoring_log_level, dry_run):
    '''
    Ping the given Cronitor URL, modified with the monitor.State. Use the given configuration
--- a/borgmatic/hooks/healthchecks.py
+++ b/borgmatic/hooks/healthchecks.py
@ -65,20 +65,22 @@ def format_buffered_logs_for_payload():
    return payload


+def initialize_monitor(ping_url_or_uuid, config_filename, monitoring_log_level, dry_run):
+    '''
+    Add a handler to the root logger that stores in memory the most recent logs emitted. That
+    way, we can send them all to Healthchecks upon a finish or failure state.
+    '''
+    logging.getLogger().addHandler(
+        Forgetful_buffering_handler(PAYLOAD_LIMIT_BYTES, monitoring_log_level)
+    )
+
+
 def ping_monitor(ping_url_or_uuid, config_filename, state, monitoring_log_level, dry_run):
    '''
    Ping the given Healthchecks URL or UUID, modified with the monitor.State. Use the given
    configuration filename in any log entries, and log to Healthchecks with the giving log level.
    If this is a dry run, then don't actually ping anything.
    '''
-    if state is monitor.State.START:
-        # Add a handler to the root logger that stores in memory the most recent logs emitted. That
-        # way, we can send them all to Healthchecks upon a finish or failure state.
-        logging.getLogger().addHandler(
-            Forgetful_buffering_handler(PAYLOAD_LIMIT_BYTES, monitoring_log_level)
-        )
-        payload = ''
-
    ping_url = (
        ping_url_or_uuid
        if ping_url_or_uuid.startswith('http')
@ -97,6 +99,8 @@ def ping_monitor(ping_url_or_uuid, config_filename, state, monitoring_log_level,

    if state in (monitor.State.FINISH, monitor.State.FAIL):
        payload = format_buffered_logs_for_payload()
+    else:
+        payload = ''

    if not dry_run:
        logging.getLogger('urllib3').setLevel(logging.ERROR)
--- a/borgmatic/hooks/pagerduty.py
+++ b/borgmatic/hooks/pagerduty.py
@ -12,6 +12,13 @@ logger = logging.getLogger(__name__)
 EVENTS_API_URL = 'https://events.pagerduty.com/v2/enqueue'


+def initialize_monitor(integration_key, config_filename, monitoring_log_level, dry_run):
+    '''
+    No initialization is necessary for this monitor.
+    '''
+    pass
+
+
 def ping_monitor(integration_key, config_filename, state, monitoring_log_level, dry_run):
    '''
    If this is an error state, create a PagerDuty event with the given integration key. Use the
--- a/docs/how-to/monitor-your-backups.md
+++ b/docs/how-to/monitor-your-backups.md
@ -117,21 +117,21 @@ hooks:
 ```

 With this hook in place, borgmatic pings your Healthchecks project when a
-backup begins, ends, or errors. Specifically, before the <a
+backup begins, ends, or errors. Specifically, after the <a
 href="https://torsion.org/borgmatic/docs/how-to/add-preparation-and-cleanup-steps-to-backups/">`before_backup`
 hooks</a> run, borgmatic lets Healthchecks know that it has started if any of
 the `prune`, `create`, or `check` actions are run.

 Then, if the actions complete successfully, borgmatic notifies Healthchecks of
-the success after the `after_backup` hooks run, and includes borgmatic logs in
+the success before the `after_backup` hooks run, and includes borgmatic logs in
 the payload data sent to Healthchecks. This means that borgmatic logs show up
 in the Healthchecks UI, although be aware that Healthchecks currently has a
 10-kilobyte limit for the logs in each ping.

-If an error occurs during any action, borgmatic notifies Healthchecks after
-the `on_error` hooks run, also tacking on logs including the error itself. But
-the logs are only included for errors that occur when a `prune`, `create`, or
-`check` action is run.
+If an error occurs during any action or hook, borgmatic notifies Healthchecks
+before the `on_error` hooks run, also tacking on logs including the error
+itself. But the logs are only included for errors that occur when a `prune`,
+`create`, or `check` action is run.

 You can customize the verbosity of the logs that are sent to Healthchecks with
 borgmatic's `--monitoring-verbosity` flag. The `--files` and `--stats` flags
@ -157,13 +157,13 @@ hooks:
 ```

 With this hook in place, borgmatic pings your Cronitor monitor when a backup
-begins, ends, or errors. Specifically, before the <a
+begins, ends, or errors. Specifically, after the <a
 href="https://torsion.org/borgmatic/docs/how-to/add-preparation-and-cleanup-steps-to-backups/">`before_backup`
 hooks</a> run, borgmatic lets Cronitor know that it has started if any of the
 `prune`, `create`, or `check` actions are run. Then, if the actions complete
-successfully, borgmatic notifies Cronitor of the success after the
-`after_backup` hooks run. And if an error occurs during any action, borgmatic
-notifies Cronitor after the `on_error` hooks run.
+successfully, borgmatic notifies Cronitor of the success before the
+`after_backup` hooks run. And if an error occurs during any action or hook,
+borgmatic notifies Cronitor before the `on_error` hooks run.

 You can configure Cronitor to notify you by a [variety of
 mechanisms](https://cronitor.io/docs/cron-job-notifications) when backups fail
@ -185,13 +185,13 @@ hooks:
 ```

 With this hook in place, borgmatic pings your Cronhub monitor when a backup
-begins, ends, or errors. Specifically, before the <a
+begins, ends, or errors. Specifically, after the <a
 href="https://torsion.org/borgmatic/docs/how-to/add-preparation-and-cleanup-steps-to-backups/">`before_backup`
 hooks</a> run, borgmatic lets Cronhub know that it has started if any of the
 `prune`, `create`, or `check` actions are run. Then, if the actions complete
-successfully, borgmatic notifies Cronhub of the success after the
-`after_backup` hooks run. And if an error occurs during any action, borgmatic
-notifies Cronhub after the `on_error` hooks run.
+successfully, borgmatic notifies Cronhub of the success before the
+`after_backup` hooks run. And if an error occurs during any action or hook,
+borgmatic notifies Cronhub before the `on_error` hooks run.

 Note that even though you configure borgmatic with the "start" variant of the
 ping URL, borgmatic substitutes the correct state into the URL when pinging
@ -228,7 +228,7 @@ hooks:

 With this hook in place, borgmatic creates a PagerDuty event for your service
 whenever backups fail. Specifically, if an error occurs during a `create`,
-`prune`, or `check` action, borgmatic sends an event to PagerDuty after the
+`prune`, or `check` action, borgmatic sends an event to PagerDuty before the
 `on_error` hooks run. Note that borgmatic does not contact PagerDuty when a
 backup starts or ends without error.