Multiple configuration includes with a single "!include" (#732).

This commit is contained in:
Dan Helfman 2023-08-01 14:12:35 -07:00
parent 69771fe7ce
commit f6f06551f0
7 changed files with 241 additions and 131 deletions

1
NEWS
View file

@ -8,6 +8,7 @@
* #728: Fix for "prune" action error when using the "keep_exclude_tags" option.
* #730: Fix for Borg's interactive prompt on the "check --repair" action automatically getting
answered "NO" even when the "check_i_know_what_i_am_doing" option isn't set.
* #732: Include multiple configuration files with a single "!include".
1.8.0
* #575: BREAKING: For the "borgmatic borg" action, instead of implicitly injecting

View file

@ -331,9 +331,8 @@ def make_parsers():
global_plus_action_parser = ArgumentParser(
description='''
Simple, configuration-driven backup software for servers and workstations. If none of
the action options are given, then borgmatic defaults to: create, prune, compact, and
check.
Simple, configuration-driven backup software for servers and workstations. If no actions
are given, then borgmatic defaults to: create, prune, compact, and check.
''',
parents=[global_parser],
)

View file

@ -1,6 +1,8 @@
import functools
import itertools
import json
import logging
import operator
import os
import ruamel.yaml
@ -8,34 +10,61 @@ import ruamel.yaml
logger = logging.getLogger(__name__)
def probe_and_include_file(filename, include_directories):
'''
Given a filename to include and a list of include directories to search for matching files,
probe for the file, load it, and return the loaded configuration as a data structure of nested
dicts, lists, etc.
Raise FileNotFoundError if the included file was not found.
'''
expanded_filename = os.path.expanduser(filename)
if os.path.isabs(expanded_filename):
return load_configuration(expanded_filename)
candidate_filenames = {
os.path.join(directory, expanded_filename) for directory in include_directories
}
for candidate_filename in candidate_filenames:
if os.path.exists(candidate_filename):
return load_configuration(candidate_filename)
raise FileNotFoundError(
f'Could not find include {filename} at {" or ".join(candidate_filenames)}'
)
def include_configuration(loader, filename_node, include_directory):
'''
Given a ruamel.yaml.loader.Loader, a ruamel.yaml.serializer.ScalarNode containing the included
filename, and an include directory path to search for matching files, load the given YAML
filename (ignoring the given loader so we can use our own) and return its contents as a data
structure of nested dicts and lists. If the filename is relative, probe for it within 1. the
current working directory and 2. the given include directory.
Given a ruamel.yaml.loader.Loader, a ruamel.yaml.nodes.ScalarNode containing the included
filename (or a list containing multiple such filenames), and an include directory path to search
for matching files, load the given YAML filenames (ignoring the given loader so we can use our
own) and return their contents as data structure of nested dicts, lists, etc. If the given
filename node's value is a scalar string, then the return value will be a single value. But if
the given node value is a list, then the return value will be a list of values, one per loaded
configuration file.
If a filename is relative, probe for it within 1. the current working directory and 2. the given
include directory.
Raise FileNotFoundError if an included file was not found.
'''
include_directories = [os.getcwd(), os.path.abspath(include_directory)]
include_filename = os.path.expanduser(filename_node.value)
if not os.path.isabs(include_filename):
candidate_filenames = [
os.path.join(directory, include_filename) for directory in include_directories
if isinstance(filename_node.value, str):
return probe_and_include_file(filename_node.value, include_directories)
if isinstance(filename_node.value, list):
return [
probe_and_include_file(node.value, include_directories)
for node in reversed(filename_node.value)
]
for candidate_filename in candidate_filenames:
if os.path.exists(candidate_filename):
include_filename = candidate_filename
break
else:
raise FileNotFoundError(
f'Could not find include {filename_node.value} at {" or ".join(candidate_filenames)}'
)
return load_configuration(include_filename)
raise ValueError(
f'!include value type ({type(filename_node.value)}) is not supported; use a single filename or a list of filenames'
)
def raise_retain_node_error(loader, node):
@ -53,7 +82,7 @@ def raise_retain_node_error(loader, node):
'The !retain tag may only be used within a configuration file containing a merged !include tag.'
)
raise ValueError('The !retain tag may only be used on a YAML mapping or sequence.')
raise ValueError('The !retain tag may only be used on a mapping or list.')
def raise_omit_node_error(loader, node):
@ -65,14 +94,14 @@ def raise_omit_node_error(loader, node):
tags are handled by deep_merge_nodes() below.
'''
raise ValueError(
'The !omit tag may only be used on a scalar (e.g., string) list element within a configuration file containing a merged !include tag.'
'The !omit tag may only be used on a scalar (e.g., string) or list element within a configuration file containing a merged !include tag.'
)
class Include_constructor(ruamel.yaml.SafeConstructor):
'''
A YAML "constructor" (a ruamel.yaml concept) that supports a custom "!include" tag for including
separate YAML configuration files. Example syntax: `retention: !include common.yaml`
separate YAML configuration files. Example syntax: `option: !include common.yaml`
'''
def __init__(self, preserve_quotes=None, loader=None, include_directory=None):
@ -81,6 +110,9 @@ class Include_constructor(ruamel.yaml.SafeConstructor):
'!include',
functools.partial(include_configuration, include_directory=include_directory),
)
# These are catch-all error handlers for tags that don't get applied and removed by
# deep_merge_nodes() below.
self.add_constructor('!retain', raise_retain_node_error)
self.add_constructor('!omit', raise_omit_node_error)
@ -90,8 +122,8 @@ class Include_constructor(ruamel.yaml.SafeConstructor):
using the YAML '<<' merge key. Example syntax:
```
retention:
keep_daily: 1
option:
sub_option: 1
<<: !include common.yaml
```
@ -104,9 +136,15 @@ class Include_constructor(ruamel.yaml.SafeConstructor):
for index, (key_node, value_node) in enumerate(node.value):
if key_node.tag == u'tag:yaml.org,2002:merge' and value_node.tag == '!include':
included_value = representer.represent_data(self.construct_object(value_node))
node.value[index] = (key_node, included_value)
# Replace the merge include with a sequence of included configuration nodes ready
# for merging. The construct_object() call here triggers include_configuration()
# among other constructors.
node.value[index] = (
key_node,
representer.represent_data(self.construct_object(value_node)),
)
# This super().flatten_mapping() call actually performs "<<" merges.
super(Include_constructor, self).flatten_mapping(node)
node.value = deep_merge_nodes(node.value)
@ -138,7 +176,12 @@ def load_configuration(filename):
file_contents = file.read()
config = yaml.load(file_contents)
if config and 'constants' in config:
try:
has_constants = bool(config and 'constants' in config)
except TypeError:
has_constants = False
if has_constants:
for key, value in config['constants'].items():
value = json.dumps(value)
file_contents = file_contents.replace(f'{{{key}}}', value.strip('"'))
@ -149,53 +192,92 @@ def load_configuration(filename):
return config
def filter_omitted_nodes(nodes):
def filter_omitted_nodes(nodes, values):
'''
Given a list of nodes, return a filtered list omitting any nodes with an "!omit" tag or with a
value matching such nodes.
Given a nested borgmatic configuration data structure as a list of tuples in the form of:
[
(
ruamel.yaml.nodes.ScalarNode as a key,
ruamel.yaml.nodes.MappingNode or other Node as a value,
),
...
]
... and a combined list of all values for those nodes, return a filtered list of the values,
omitting any that have an "!omit" tag (or with a value matching such nodes).
But if only a single node is given, bail and return the given values unfiltered, as "!omit" only
applies when there are merge includes (and therefore multiple nodes).
'''
omitted_values = tuple(node.value for node in nodes if node.tag == '!omit')
if len(nodes) <= 1:
return values
return [node for node in nodes if node.value not in omitted_values]
omitted_values = tuple(node.value for node in values if node.tag == '!omit')
return [node for node in values if node.value not in omitted_values]
DELETED_NODE = object()
def merge_values(nodes):
'''
Given a nested borgmatic configuration data structure as a list of tuples in the form of:
[
(
ruamel.yaml.nodes.ScalarNode as a key,
ruamel.yaml.nodes.MappingNode or other Node as a value,
),
...
]
... merge its sequence or mapping node values and return the result. For sequence nodes, this
means appending together its contained lists. For mapping nodes, it means merging its contained
dicts.
'''
return functools.reduce(operator.add, (value.value for key, value in nodes))
def deep_merge_nodes(nodes):
'''
Given a nested borgmatic configuration data structure as a list of tuples in the form of:
[
(
ruamel.yaml.nodes.ScalarNode as a key,
ruamel.yaml.nodes.MappingNode or other Node as a value,
),
...
]
... deep merge any node values corresponding to duplicate keys and return the result. If
there are colliding keys with non-MappingNode values (e.g., integers or strings), the last
of the values wins.
... deep merge any node values corresponding to duplicate keys and return the result. The
purpose of merging like this is to support, for instance, merging one borgmatic configuration
file into another for reuse, such that a configuration option with sub-options does not
completely replace the corresponding option in a merged file.
If there are colliding keys with scalar values (e.g., integers or strings), the last of the
values wins.
For instance, given node values of:
[
(
ScalarNode(tag='tag:yaml.org,2002:str', value='retention'),
ScalarNode(tag='tag:yaml.org,2002:str', value='option'),
MappingNode(tag='tag:yaml.org,2002:map', value=[
(
ScalarNode(tag='tag:yaml.org,2002:str', value='keep_hourly'),
ScalarNode(tag='tag:yaml.org,2002:int', value='24')
ScalarNode(tag='tag:yaml.org,2002:str', value='sub_option1'),
ScalarNode(tag='tag:yaml.org,2002:int', value='1')
),
(
ScalarNode(tag='tag:yaml.org,2002:str', value='keep_daily'),
ScalarNode(tag='tag:yaml.org,2002:int', value='7')
ScalarNode(tag='tag:yaml.org,2002:str', value='sub_option2'),
ScalarNode(tag='tag:yaml.org,2002:int', value='2')
),
]),
),
(
ScalarNode(tag='tag:yaml.org,2002:str', value='retention'),
ScalarNode(tag='tag:yaml.org,2002:str', value='option'),
MappingNode(tag='tag:yaml.org,2002:map', value=[
(
ScalarNode(tag='tag:yaml.org,2002:str', value='keep_daily'),
ScalarNode(tag='tag:yaml.org,2002:str', value='sub_option2'),
ScalarNode(tag='tag:yaml.org,2002:int', value='5')
),
]),
@ -206,88 +288,95 @@ def deep_merge_nodes(nodes):
[
(
ScalarNode(tag='tag:yaml.org,2002:str', value='retention'),
ScalarNode(tag='tag:yaml.org,2002:str', value='option'),
MappingNode(tag='tag:yaml.org,2002:map', value=[
(
ScalarNode(tag='tag:yaml.org,2002:str', value='keep_hourly'),
ScalarNode(tag='tag:yaml.org,2002:int', value='24')
ScalarNode(tag='tag:yaml.org,2002:str', value='sub_option1'),
ScalarNode(tag='tag:yaml.org,2002:int', value='1')
),
(
ScalarNode(tag='tag:yaml.org,2002:str', value='keep_daily'),
ScalarNode(tag='tag:yaml.org,2002:str', value='sub_option2'),
ScalarNode(tag='tag:yaml.org,2002:int', value='5')
),
]),
),
]
This function supports multi-way merging, meaning that if the same option name exists three or
more times (at the same scope level), all of those instances get merged together.
If a mapping or sequence node has a YAML "!retain" tag, then that node is not merged.
The purpose of deep merging like this is to support, for instance, merging one borgmatic
configuration file into another for reuse, such that a configuration option with sub-options
does not completely replace the corresponding option in a merged file.
Raise ValueError if a merge is implied using two incompatible types.
Raise ValueError if a merge is implied using multiple incompatible types.
'''
# Map from original node key/value to the replacement merged node. DELETED_NODE as a replacement
# node indications deletion.
replaced_nodes = {}
merged_nodes = []
# To find nodes that require merging, compare each node with each other node.
for a_key, a_value in nodes:
for b_key, b_value in nodes:
# If we've already considered one of the nodes for merging, skip it.
if (a_key, a_value) in replaced_nodes or (b_key, b_value) in replaced_nodes:
continue
def get_node_key_name(node):
return node[0].value
# If the keys match and the values are different, we need to merge these two A and B nodes.
if a_key.tag == b_key.tag and a_key.value == b_key.value and a_value != b_value:
if not type(a_value) is type(b_value):
raise ValueError(
f'Incompatible types found when trying to merge "{a_key.value}:" values across configuration files: {type(a_value).id} and {type(b_value).id}'
# Bucket the nodes by their keys. Then merge all of the values sharing the same key.
for key_name, grouped_nodes in itertools.groupby(
sorted(nodes, key=get_node_key_name), get_node_key_name
):
grouped_nodes = list(grouped_nodes)
# The merged node inherits its attributes from the final node in the group.
(last_node_key, last_node_value) = grouped_nodes[-1]
value_types = set(type(value) for (_, value) in grouped_nodes)
if len(value_types) > 1:
raise ValueError(
f'Incompatible types found when trying to merge "{key_name}:" values across configuration files: {", ".join(value_type.id for value_type in value_types)}'
)
# If we're dealing with MappingNodes, recurse and merge its values as well.
if ruamel.yaml.nodes.MappingNode in value_types:
# A "!retain" tag says to skip deep merging for this node. Replace the tag so
# downstream schema validation doesn't break on our application-specific tag.
if last_node_value.tag == '!retain' and len(grouped_nodes) > 1:
last_node_value.tag = 'tag:yaml.org,2002:map'
merged_nodes.append((last_node_key, last_node_value))
else:
merged_nodes.append(
(
last_node_key,
ruamel.yaml.nodes.MappingNode(
tag=last_node_value.tag,
value=deep_merge_nodes(merge_values(grouped_nodes)),
start_mark=last_node_value.start_mark,
end_mark=last_node_value.end_mark,
flow_style=last_node_value.flow_style,
comment=last_node_value.comment,
anchor=last_node_value.anchor,
),
)
)
# Since we're merging into the B node, consider the A node a duplicate and remove it.
replaced_nodes[(a_key, a_value)] = DELETED_NODE
continue
# If we're dealing with MappingNodes, recurse and merge its values as well.
if isinstance(b_value, ruamel.yaml.nodes.MappingNode):
# A "!retain" tag says to skip deep merging for this node. Replace the tag so
# downstream schema validation doesn't break on our application-specific tag.
if b_value.tag == '!retain':
b_value.tag = 'tag:yaml.org,2002:map'
else:
replaced_nodes[(b_key, b_value)] = (
b_key,
ruamel.yaml.nodes.MappingNode(
tag=b_value.tag,
value=deep_merge_nodes(a_value.value + b_value.value),
start_mark=b_value.start_mark,
end_mark=b_value.end_mark,
flow_style=b_value.flow_style,
comment=b_value.comment,
anchor=b_value.anchor,
),
)
# If we're dealing with SequenceNodes, merge by appending one sequence to the other.
elif isinstance(b_value, ruamel.yaml.nodes.SequenceNode):
# A "!retain" tag says to skip deep merging for this node. Replace the tag so
# downstream schema validation doesn't break on our application-specific tag.
if b_value.tag == '!retain':
b_value.tag = 'tag:yaml.org,2002:seq'
else:
replaced_nodes[(b_key, b_value)] = (
b_key,
ruamel.yaml.nodes.SequenceNode(
tag=b_value.tag,
value=filter_omitted_nodes(a_value.value + b_value.value),
start_mark=b_value.start_mark,
end_mark=b_value.end_mark,
flow_style=b_value.flow_style,
comment=b_value.comment,
anchor=b_value.anchor,
),
)
# If we're dealing with SequenceNodes, merge by appending sequences together.
if ruamel.yaml.nodes.SequenceNode in value_types:
if last_node_value.tag == '!retain' and len(grouped_nodes) > 1:
last_node_value.tag = 'tag:yaml.org,2002:seq'
merged_nodes.append((last_node_key, last_node_value))
else:
merged_nodes.append(
(
last_node_key,
ruamel.yaml.nodes.SequenceNode(
tag=last_node_value.tag,
value=filter_omitted_nodes(grouped_nodes, merge_values(grouped_nodes)),
start_mark=last_node_value.start_mark,
end_mark=last_node_value.end_mark,
flow_style=last_node_value.flow_style,
comment=last_node_value.comment,
anchor=last_node_value.anchor,
),
)
)
return [
replaced_nodes.get(node, node) for node in nodes if replaced_nodes.get(node) != DELETED_NODE
]
continue
merged_nodes.append((last_node_key, last_node_value))
return merged_nodes

View file

@ -10,7 +10,11 @@ def normalize_sections(config_filename, config):
Raise ValueError if the "prefix" option is set in both "location" and "consistency" sections.
'''
location = config.get('location') or {}
try:
location = config.get('location') or {}
except AttributeError:
raise ValueError('Configuration does not contain any options')
storage = config.get('storage') or {}
consistency = config.get('consistency') or {}
hooks = config.get('hooks') or {}

View file

@ -309,8 +309,8 @@ source_directories:
- /etc
```
<span class="minilink minilink-addedin">Prior to version 1.8.0</span> Put
this option in the `location:` section of your configuration.
<span class="minilink minilink-addedin">Prior to version 1.8.0</span> Put the
`source_directories` option in the `location:` section of your configuration.
Once this include gets merged in, the resulting configuration will have a
`source_directories` value of `/etc` and `/var`—with `/home` omitted.

View file

@ -438,8 +438,9 @@ def test_raise_omit_node_error_raises():
module.raise_omit_node_error(loader=flexmock(), node=flexmock())
def test_filter_omitted_nodes():
nodes = [
def test_filter_omitted_nodes_discards_values_with_omit_tag_and_also_equal_values():
nodes = [flexmock(), flexmock()]
values = [
module.ruamel.yaml.nodes.ScalarNode(tag='tag:yaml.org,2002:str', value='a'),
module.ruamel.yaml.nodes.ScalarNode(tag='tag:yaml.org,2002:str', value='b'),
module.ruamel.yaml.nodes.ScalarNode(tag='tag:yaml.org,2002:str', value='c'),
@ -448,11 +449,27 @@ def test_filter_omitted_nodes():
module.ruamel.yaml.nodes.ScalarNode(tag='tag:yaml.org,2002:str', value='c'),
]
result = module.filter_omitted_nodes(nodes)
result = module.filter_omitted_nodes(nodes, values)
assert [item.value for item in result] == ['a', 'c', 'a', 'c']
def test_filter_omitted_nodes_keeps_all_values_when_given_only_one_node():
nodes = [flexmock()]
values = [
module.ruamel.yaml.nodes.ScalarNode(tag='tag:yaml.org,2002:str', value='a'),
module.ruamel.yaml.nodes.ScalarNode(tag='tag:yaml.org,2002:str', value='b'),
module.ruamel.yaml.nodes.ScalarNode(tag='tag:yaml.org,2002:str', value='c'),
module.ruamel.yaml.nodes.ScalarNode(tag='tag:yaml.org,2002:str', value='a'),
module.ruamel.yaml.nodes.ScalarNode(tag='!omit', value='b'),
module.ruamel.yaml.nodes.ScalarNode(tag='tag:yaml.org,2002:str', value='c'),
]
result = module.filter_omitted_nodes(nodes, values)
assert [item.value for item in result] == ['a', 'b', 'c', 'a', 'b', 'c']
def test_deep_merge_nodes_replaces_colliding_scalar_values():
node_values = [
(
@ -499,10 +516,10 @@ def test_deep_merge_nodes_replaces_colliding_scalar_values():
assert section_key.value == 'retention'
options = section_value.value
assert len(options) == 2
assert options[0][0].value == 'keep_hourly'
assert options[0][1].value == '24'
assert options[1][0].value == 'keep_daily'
assert options[1][1].value == '5'
assert options[0][0].value == 'keep_daily'
assert options[0][1].value == '5'
assert options[1][0].value == 'keep_hourly'
assert options[1][1].value == '24'
def test_deep_merge_nodes_keeps_non_colliding_scalar_values():
@ -553,10 +570,10 @@ def test_deep_merge_nodes_keeps_non_colliding_scalar_values():
assert section_key.value == 'retention'
options = section_value.value
assert len(options) == 3
assert options[0][0].value == 'keep_hourly'
assert options[0][1].value == '24'
assert options[1][0].value == 'keep_daily'
assert options[1][1].value == '7'
assert options[0][0].value == 'keep_daily'
assert options[0][1].value == '7'
assert options[1][0].value == 'keep_hourly'
assert options[1][1].value == '24'
assert options[2][0].value == 'keep_minutely'
assert options[2][1].value == '10'
@ -629,10 +646,10 @@ def test_deep_merge_nodes_keeps_deeply_nested_values():
assert section_key.value == 'storage'
options = section_value.value
assert len(options) == 2
assert options[0][0].value == 'lock_wait'
assert options[0][1].value == '5'
assert options[1][0].value == 'extra_borg_options'
nested_options = options[1][1].value
assert options[0][0].value == 'extra_borg_options'
assert options[1][0].value == 'lock_wait'
assert options[1][1].value == '5'
nested_options = options[0][1].value
assert len(nested_options) == 2
assert nested_options[0][0].value == 'init'
assert nested_options[0][1].value == '--init-option'

View file

@ -117,7 +117,7 @@ def test_parse_configuration_with_schema_lacking_examples_does_not_raise():
module.parse_configuration('/tmp/config.yaml', '/tmp/schema.yaml')
def test_parse_configuration_inlines_include():
def test_parse_configuration_inlines_include_inside_deprecated_section():
mock_config_and_schema(
'''
source_directories: