Optionally dump "all" PostgreSQL databases to separate files instead of one combined dump file (#438, #560).

This commit is contained in:
Dan Helfman 2022-12-15 22:59:42 -08:00
parent 437fd4dbae
commit f67c544be6
6 changed files with 221 additions and 70 deletions

3
NEWS
View file

@ -1,4 +1,7 @@
1.7.6.dev0
* #438, #560: Optionally dump "all" PostgreSQL databases to separate files instead of one combined
dump file, allowing more convenient restores of individual databases. You can enable this by
specifying the database dump "format" option when the database is named "all".
* #602: Fix logs that interfere with JSON output by making warnings go to stderr instead of stdout.
1.7.5

View file

@ -691,10 +691,13 @@ properties:
type: string
description: |
Database name (required if using this hook). Or
"all" to dump all databases on the host. Note
that using this database hook implicitly enables
both read_special and one_file_system (see
above) to support dump and restore streaming.
"all" to dump all databases on the host. (Also
set the "format" to dump each database to a
separate file instead of one combined file.)
Note that using this database hook implicitly
enables both read_special and one_file_system
(see above) to support dump and restore
streaming.
example: users
hostname:
type: string
@ -729,9 +732,14 @@ properties:
description: |
Database dump output format. One of "plain",
"custom", "directory", or "tar". Defaults to
"custom" (unlike raw pg_dump). See pg_dump
documentation for details. Note that format is
ignored when the database name is "all".
"custom" (unlike raw pg_dump) for a single
database. Or, when database name is "all" and
format is blank, dumps all databases to a single
file. But if a format is specified with an "all"
database name, dumps each database to a separate
file of that format, allowing more convenient
restores of individual databases. See the
pg_dump documentation for more about formats.
example: directory
ssl_mode:
type: string

View file

@ -24,7 +24,7 @@ SYSTEM_DATABASE_NAMES = ('information_schema', 'mysql', 'performance_schema', 's
def database_names_to_dump(database, extra_environment, log_prefix, dry_run_label):
'''
Given a requested database name, return the corresponding sequence of database names to dump.
Given a requested database config, return the corresponding sequence of database names to dump.
In the case of "all", query for the names of databases on the configured host and return them,
excluding any system databases that will cause problems during restore.
'''

View file

@ -1,6 +1,11 @@
import csv
import logging
from borgmatic.execute import execute_command, execute_command_with_processes
from borgmatic.execute import (
execute_command,
execute_command_and_capture_output,
execute_command_with_processes,
)
from borgmatic.hooks import dump
logger = logging.getLogger(__name__)
@ -34,6 +39,44 @@ def make_extra_environment(database):
return extra
EXCLUDED_DATABASE_NAMES = ('template0', 'template1')
def database_names_to_dump(database, extra_environment, log_prefix, dry_run_label):
'''
Given a requested database config, return the corresponding sequence of database names to dump.
In the case of "all" when a database format is given, query for the names of databases on the
configured host and return them. For "all" without a database format, just return a sequence
containing "all".
'''
requested_name = database['name']
if requested_name != 'all':
return (requested_name,)
if not database.get('format'):
return ('all',)
list_command = (
('psql', '--list', '--no-password', '--csv', '--tuples-only')
+ (('--host', database['hostname']) if 'hostname' in database else ())
+ (('--port', str(database['port'])) if 'port' in database else ())
+ (('--username', database['username']) if 'username' in database else ())
+ (tuple(database['options'].split(' ')) if 'options' in database else ())
)
logger.debug(
'{}: Querying for "all" PostgreSQL databases to dump{}'.format(log_prefix, dry_run_label)
)
list_output = execute_command_and_capture_output(
list_command, extra_environment=extra_environment
)
return tuple(
row[0]
for row in csv.reader(list_output.splitlines(), delimiter=',', quotechar='"')
if row[0] not in EXCLUDED_DATABASE_NAMES
)
def dump_databases(databases, log_prefix, location_config, dry_run):
'''
Dump the given PostgreSQL databases to a named pipe. The databases are supplied as a sequence of
@ -43,6 +86,8 @@ def dump_databases(databases, log_prefix, location_config, dry_run):
Return a sequence of subprocess.Popen instances for the dump processes ready to spew to a named
pipe. But if this is a dry run, then don't actually dump anything and return an empty sequence.
Raise ValueError if the databases to dump cannot be determined.
'''
dry_run_label = ' (dry run; not actually dumping anything)' if dry_run else ''
processes = []
@ -50,48 +95,59 @@ def dump_databases(databases, log_prefix, location_config, dry_run):
logger.info('{}: Dumping PostgreSQL databases{}'.format(log_prefix, dry_run_label))
for database in databases:
name = database['name']
dump_filename = dump.make_database_dump_filename(
make_dump_path(location_config), name, database.get('hostname')
)
all_databases = bool(name == 'all')
dump_format = database.get('format', 'custom')
default_dump_command = 'pg_dumpall' if all_databases else 'pg_dump'
dump_command = database.get('pg_dump_command') or default_dump_command
command = (
(dump_command, '--no-password', '--clean', '--if-exists',)
+ (('--host', database['hostname']) if 'hostname' in database else ())
+ (('--port', str(database['port'])) if 'port' in database else ())
+ (('--username', database['username']) if 'username' in database else ())
+ (() if all_databases else ('--format', dump_format))
+ (('--file', dump_filename) if dump_format == 'directory' else ())
+ (tuple(database['options'].split(' ')) if 'options' in database else ())
+ (() if all_databases else (name,))
# Use shell redirection rather than the --file flag to sidestep synchronization issues
# when pg_dump/pg_dumpall tries to write to a named pipe. But for the directory dump
# format in a particular, a named destination is required, and redirection doesn't work.
+ (('>', dump_filename) if dump_format != 'directory' else ())
)
extra_environment = make_extra_environment(database)
logger.debug(
'{}: Dumping PostgreSQL database {} to {}{}'.format(
log_prefix, name, dump_filename, dry_run_label
)
dump_path = make_dump_path(location_config)
dump_database_names = database_names_to_dump(
database, extra_environment, log_prefix, dry_run_label
)
if dry_run:
continue
if dump_format == 'directory':
dump.create_parent_directory_for_dump(dump_filename)
else:
dump.create_named_pipe_for_dump(dump_filename)
if not dump_database_names:
raise ValueError('Cannot find any PostgreSQL databases to dump.')
processes.append(
execute_command(
command, shell=True, extra_environment=extra_environment, run_to_completion=False
for database_name in dump_database_names:
dump_format = database.get('format', None if database_name == 'all' else 'custom')
default_dump_command = 'pg_dumpall' if database_name == 'all' else 'pg_dump'
dump_command = database.get('pg_dump_command') or default_dump_command
dump_filename = dump.make_database_dump_filename(
dump_path, database_name, database.get('hostname')
)
command = (
(dump_command, '--no-password', '--clean', '--if-exists',)
+ (('--host', database['hostname']) if 'hostname' in database else ())
+ (('--port', str(database['port'])) if 'port' in database else ())
+ (('--username', database['username']) if 'username' in database else ())
+ (('--format', dump_format) if dump_format else ())
+ (('--file', dump_filename) if dump_format == 'directory' else ())
+ (tuple(database['options'].split(' ')) if 'options' in database else ())
+ (() if database_name == 'all' else (database_name,))
# Use shell redirection rather than the --file flag to sidestep synchronization issues
# when pg_dump/pg_dumpall tries to write to a named pipe. But for the directory dump
# format in a particular, a named destination is required, and redirection doesn't work.
+ (('>', dump_filename) if dump_format != 'directory' else ())
)
logger.debug(
'{}: Dumping PostgreSQL database "{}" to {}{}'.format(
log_prefix, database_name, dump_filename, dry_run_label
)
)
if dry_run:
continue
if dump_format == 'directory':
dump.create_parent_directory_for_dump(dump_filename)
else:
dump.create_named_pipe_for_dump(dump_filename)
processes.append(
execute_command(
command,
shell=True,
extra_environment=extra_environment,
run_to_completion=False,
)
)
)
return processes

View file

@ -76,6 +76,9 @@ hooks:
options: "--ssl"
```
### All databases
If you want to dump all databases on a host, use `all` for the database name:
```yaml
@ -91,9 +94,30 @@ hooks:
Note that you may need to use a `username` of the `postgres` superuser for
this to work with PostgreSQL.
If you would like to backup databases only and not source directories, you can
specify an empty `source_directories` value (as it is a mandatory field prior
to borgmatic 1.7.1):
<span class="minilink minilink-addedin">New in version 1.7.6</span> With
PostgreSQL and MySQL, you can optionally dump "all" databases to separate
files instead of one combined dump file, allowing more convenient restores of
individual databases. Enable this by specifying your desired database dump
`format`:
```yaml
hooks:
postgresql_databases:
- name: all
format: custom
mysql_databases:
- name: all
format: sql
```
### No source directories
<span class="minilink minilink-addedin">New in version 1.7.1</span> If you
would like to backup databases only and not source directories, you can omit
`source_directories` entirely.
In older versions of borgmatic, instead specify an empty `source_directories`
value, as it is a mandatory option prior to version 1.7.1:
```yaml
location:
@ -103,8 +127,7 @@ hooks:
- name: all
```
<span class="minilink minilink-addedin">New in version 1.7.1</span> You can
omit `source_directories` entirely.
### External passwords

View file

@ -6,15 +6,57 @@ from flexmock import flexmock
from borgmatic.hooks import postgresql as module
def test_database_names_to_dump_passes_through_individual_database_name():
database = {'name': 'foo'}
assert module.database_names_to_dump(database, flexmock(), flexmock(), flexmock()) == ('foo',)
def test_database_names_to_dump_passes_through_individual_database_name_with_format():
database = {'name': 'foo', 'format': 'custom'}
assert module.database_names_to_dump(database, flexmock(), flexmock(), flexmock()) == ('foo',)
def test_database_names_to_dump_passes_through_all_without_format():
database = {'name': 'all'}
assert module.database_names_to_dump(database, flexmock(), flexmock(), flexmock()) == ('all',)
def test_database_names_to_dump_with_all_and_format_lists_databases():
database = {'name': 'all', 'format': 'custom'}
flexmock(module).should_receive('execute_command_and_capture_output').and_return(
'foo,test,\nbar,test,"stuff and such"'
)
assert module.database_names_to_dump(database, flexmock(), flexmock(), flexmock()) == (
'foo',
'bar',
)
def test_database_names_to_dump_with_all_and_format_excludes_particular_databases():
database = {'name': 'all', 'format': 'custom'}
flexmock(module).should_receive('execute_command_and_capture_output').and_return(
'foo,test,\ntemplate0,test,blah'
)
assert module.database_names_to_dump(database, flexmock(), flexmock(), flexmock()) == ('foo',)
def test_dump_databases_runs_pg_dump_for_each_database():
databases = [{'name': 'foo'}, {'name': 'bar'}]
processes = [flexmock(), flexmock()]
flexmock(module).should_receive('make_extra_environment').and_return({'PGSSLMODE': 'disable'})
flexmock(module).should_receive('make_dump_path').and_return('')
flexmock(module).should_receive('database_names_to_dump').and_return(('foo',)).and_return(
('bar',)
)
flexmock(module.dump).should_receive('make_database_dump_filename').and_return(
'databases/localhost/foo'
).and_return('databases/localhost/bar')
flexmock(module.dump).should_receive('create_named_pipe_for_dump')
flexmock(module).should_receive('make_extra_environment').and_return({'PGSSLMODE': 'disable'})
for name, process in zip(('foo', 'bar'), processes):
flexmock(module).should_receive('execute_command').with_args(
@ -37,14 +79,27 @@ def test_dump_databases_runs_pg_dump_for_each_database():
assert module.dump_databases(databases, 'test.yaml', {}, dry_run=False) == processes
def test_dump_databases_runs_raises_when_no_database_names_to_dump():
databases = [{'name': 'foo'}, {'name': 'bar'}]
flexmock(module).should_receive('make_extra_environment').and_return({'PGSSLMODE': 'disable'})
flexmock(module).should_receive('make_dump_path').and_return('')
flexmock(module).should_receive('database_names_to_dump').and_return(())
with pytest.raises(ValueError):
module.dump_databases(databases, 'test.yaml', {}, dry_run=False)
def test_dump_databases_with_dry_run_skips_pg_dump():
databases = [{'name': 'foo'}, {'name': 'bar'}]
flexmock(module).should_receive('make_extra_environment').and_return({'PGSSLMODE': 'disable'})
flexmock(module).should_receive('make_dump_path').and_return('')
flexmock(module).should_receive('database_names_to_dump').and_return(('foo',)).and_return(
('bar',)
)
flexmock(module.dump).should_receive('make_database_dump_filename').and_return(
'databases/localhost/foo'
).and_return('databases/localhost/bar')
flexmock(module.dump).should_receive('create_named_pipe_for_dump').never()
flexmock(module).should_receive('make_extra_environment').and_return({'PGSSLMODE': 'disable'})
flexmock(module).should_receive('execute_command').never()
assert module.dump_databases(databases, 'test.yaml', {}, dry_run=True) == []
@ -53,12 +108,13 @@ def test_dump_databases_with_dry_run_skips_pg_dump():
def test_dump_databases_runs_pg_dump_with_hostname_and_port():
databases = [{'name': 'foo', 'hostname': 'database.example.org', 'port': 5433}]
process = flexmock()
flexmock(module).should_receive('make_extra_environment').and_return({'PGSSLMODE': 'disable'})
flexmock(module).should_receive('make_dump_path').and_return('')
flexmock(module).should_receive('database_names_to_dump').and_return(('foo',))
flexmock(module.dump).should_receive('make_database_dump_filename').and_return(
'databases/database.example.org/foo'
)
flexmock(module.dump).should_receive('create_named_pipe_for_dump')
flexmock(module).should_receive('make_extra_environment').and_return({'PGSSLMODE': 'disable'})
flexmock(module).should_receive('execute_command').with_args(
(
@ -87,14 +143,15 @@ def test_dump_databases_runs_pg_dump_with_hostname_and_port():
def test_dump_databases_runs_pg_dump_with_username_and_password():
databases = [{'name': 'foo', 'username': 'postgres', 'password': 'trustsome1'}]
process = flexmock()
flexmock(module).should_receive('make_extra_environment').and_return(
{'PGPASSWORD': 'trustsome1', 'PGSSLMODE': 'disable'}
)
flexmock(module).should_receive('make_dump_path').and_return('')
flexmock(module).should_receive('database_names_to_dump').and_return(('foo',))
flexmock(module.dump).should_receive('make_database_dump_filename').and_return(
'databases/localhost/foo'
)
flexmock(module.dump).should_receive('create_named_pipe_for_dump')
flexmock(module).should_receive('make_extra_environment').and_return(
{'PGPASSWORD': 'trustsome1', 'PGSSLMODE': 'disable'}
)
flexmock(module).should_receive('execute_command').with_args(
(
@ -144,13 +201,14 @@ def test_make_extra_environment_maps_options_to_environment():
def test_dump_databases_runs_pg_dump_with_directory_format():
databases = [{'name': 'foo', 'format': 'directory'}]
process = flexmock()
flexmock(module).should_receive('make_extra_environment').and_return({'PGSSLMODE': 'disable'})
flexmock(module).should_receive('make_dump_path').and_return('')
flexmock(module).should_receive('database_names_to_dump').and_return(('foo',))
flexmock(module.dump).should_receive('make_database_dump_filename').and_return(
'databases/localhost/foo'
)
flexmock(module.dump).should_receive('create_parent_directory_for_dump')
flexmock(module.dump).should_receive('create_named_pipe_for_dump').never()
flexmock(module).should_receive('make_extra_environment').and_return({'PGSSLMODE': 'disable'})
flexmock(module).should_receive('execute_command').with_args(
(
@ -175,12 +233,13 @@ def test_dump_databases_runs_pg_dump_with_directory_format():
def test_dump_databases_runs_pg_dump_with_options():
databases = [{'name': 'foo', 'options': '--stuff=such'}]
process = flexmock()
flexmock(module).should_receive('make_extra_environment').and_return({'PGSSLMODE': 'disable'})
flexmock(module).should_receive('make_dump_path').and_return('')
flexmock(module).should_receive('database_names_to_dump').and_return(('foo',))
flexmock(module.dump).should_receive('make_database_dump_filename').and_return(
'databases/localhost/foo'
)
flexmock(module.dump).should_receive('create_named_pipe_for_dump')
flexmock(module).should_receive('make_extra_environment').and_return({'PGSSLMODE': 'disable'})
flexmock(module).should_receive('execute_command').with_args(
(
@ -206,12 +265,13 @@ def test_dump_databases_runs_pg_dump_with_options():
def test_dump_databases_runs_pg_dumpall_for_all_databases():
databases = [{'name': 'all'}]
process = flexmock()
flexmock(module).should_receive('make_extra_environment').and_return({'PGSSLMODE': 'disable'})
flexmock(module).should_receive('make_dump_path').and_return('')
flexmock(module).should_receive('database_names_to_dump').and_return(('all',))
flexmock(module.dump).should_receive('make_database_dump_filename').and_return(
'databases/localhost/all'
)
flexmock(module.dump).should_receive('create_named_pipe_for_dump')
flexmock(module).should_receive('make_extra_environment').and_return({'PGSSLMODE': 'disable'})
flexmock(module).should_receive('execute_command').with_args(
('pg_dumpall', '--no-password', '--clean', '--if-exists', '>', 'databases/localhost/all'),
@ -226,12 +286,13 @@ def test_dump_databases_runs_pg_dumpall_for_all_databases():
def test_dump_databases_runs_non_default_pg_dump():
databases = [{'name': 'foo', 'pg_dump_command': 'special_pg_dump'}]
process = flexmock()
flexmock(module).should_receive('make_extra_environment').and_return({'PGSSLMODE': 'disable'})
flexmock(module).should_receive('make_dump_path').and_return('')
flexmock(module).should_receive('database_names_to_dump').and_return(('foo',))
flexmock(module.dump).should_receive('make_database_dump_filename').and_return(
'databases/localhost/foo'
)
flexmock(module.dump).should_receive('create_named_pipe_for_dump')
flexmock(module).should_receive('make_extra_environment').and_return({'PGSSLMODE': 'disable'})
flexmock(module).should_receive('execute_command').with_args(
(
@ -257,9 +318,9 @@ def test_restore_database_dump_runs_pg_restore():
database_config = [{'name': 'foo'}]
extract_process = flexmock(stdout=flexmock())
flexmock(module).should_receive('make_extra_environment').and_return({'PGSSLMODE': 'disable'})
flexmock(module).should_receive('make_dump_path')
flexmock(module.dump).should_receive('make_database_dump_filename')
flexmock(module).should_receive('make_extra_environment').and_return({'PGSSLMODE': 'disable'})
flexmock(module).should_receive('execute_command_with_processes').with_args(
(
'pg_restore',
@ -288,9 +349,9 @@ def test_restore_database_dump_runs_pg_restore():
def test_restore_database_dump_errors_on_multiple_database_config():
database_config = [{'name': 'foo'}, {'name': 'bar'}]
flexmock(module).should_receive('make_extra_environment').and_return({'PGSSLMODE': 'disable'})
flexmock(module).should_receive('make_dump_path')
flexmock(module.dump).should_receive('make_database_dump_filename')
flexmock(module).should_receive('make_extra_environment').and_return({'PGSSLMODE': 'disable'})
flexmock(module).should_receive('execute_command_with_processes').never()
flexmock(module).should_receive('execute_command').never()
@ -304,9 +365,9 @@ def test_restore_database_dump_runs_pg_restore_with_hostname_and_port():
database_config = [{'name': 'foo', 'hostname': 'database.example.org', 'port': 5433}]
extract_process = flexmock(stdout=flexmock())
flexmock(module).should_receive('make_extra_environment').and_return({'PGSSLMODE': 'disable'})
flexmock(module).should_receive('make_dump_path')
flexmock(module.dump).should_receive('make_database_dump_filename')
flexmock(module).should_receive('make_extra_environment').and_return({'PGSSLMODE': 'disable'})
flexmock(module).should_receive('execute_command_with_processes').with_args(
(
'pg_restore',
@ -352,11 +413,11 @@ def test_restore_database_dump_runs_pg_restore_with_username_and_password():
database_config = [{'name': 'foo', 'username': 'postgres', 'password': 'trustsome1'}]
extract_process = flexmock(stdout=flexmock())
flexmock(module).should_receive('make_dump_path')
flexmock(module.dump).should_receive('make_database_dump_filename')
flexmock(module).should_receive('make_extra_environment').and_return(
{'PGPASSWORD': 'trustsome1', 'PGSSLMODE': 'disable'}
)
flexmock(module).should_receive('make_dump_path')
flexmock(module.dump).should_receive('make_database_dump_filename')
flexmock(module).should_receive('execute_command_with_processes').with_args(
(
'pg_restore',
@ -398,9 +459,9 @@ def test_restore_database_dump_runs_psql_for_all_database_dump():
database_config = [{'name': 'all'}]
extract_process = flexmock(stdout=flexmock())
flexmock(module).should_receive('make_extra_environment').and_return({'PGSSLMODE': 'disable'})
flexmock(module).should_receive('make_dump_path')
flexmock(module.dump).should_receive('make_database_dump_filename')
flexmock(module).should_receive('make_extra_environment').and_return({'PGSSLMODE': 'disable'})
flexmock(module).should_receive('execute_command_with_processes').with_args(
('psql', '--no-password'),
processes=[extract_process],
@ -424,9 +485,9 @@ def test_restore_database_dump_runs_non_default_pg_restore_and_psql():
]
extract_process = flexmock(stdout=flexmock())
flexmock(module).should_receive('make_extra_environment').and_return({'PGSSLMODE': 'disable'})
flexmock(module).should_receive('make_dump_path')
flexmock(module.dump).should_receive('make_database_dump_filename')
flexmock(module).should_receive('make_extra_environment').and_return({'PGSSLMODE': 'disable'})
flexmock(module).should_receive('execute_command_with_processes').with_args(
(
'special_pg_restore',
@ -455,9 +516,9 @@ def test_restore_database_dump_runs_non_default_pg_restore_and_psql():
def test_restore_database_dump_with_dry_run_skips_restore():
database_config = [{'name': 'foo'}]
flexmock(module).should_receive('make_extra_environment').and_return({'PGSSLMODE': 'disable'})
flexmock(module).should_receive('make_dump_path')
flexmock(module.dump).should_receive('make_database_dump_filename')
flexmock(module).should_receive('make_extra_environment').and_return({'PGSSLMODE': 'disable'})
flexmock(module).should_receive('execute_command_with_processes').never()
module.restore_database_dump(
@ -468,9 +529,9 @@ def test_restore_database_dump_with_dry_run_skips_restore():
def test_restore_database_dump_without_extract_process_restores_from_disk():
database_config = [{'name': 'foo'}]
flexmock(module).should_receive('make_extra_environment').and_return({'PGSSLMODE': 'disable'})
flexmock(module).should_receive('make_dump_path')
flexmock(module.dump).should_receive('make_database_dump_filename').and_return('/dump/path')
flexmock(module).should_receive('make_extra_environment').and_return({'PGSSLMODE': 'disable'})
flexmock(module).should_receive('execute_command_with_processes').with_args(
(
'pg_restore',