Coverage for python/lsst/daf/butler/core/config.py: 42%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24"""Configuration control."""
26__all__ = ("Config", "ConfigSubset")
28import collections
29import copy
30import json
31import logging
32import pprint
33import os
34import yaml
35import sys
36from pathlib import Path
37from yaml.representer import Representer
38import io
39from typing import Any, Dict, List, Sequence, Optional, ClassVar, IO, Tuple, Union
41from lsst.utils import doImport
42from ._butlerUri import ButlerURI
44yaml.add_representer(collections.defaultdict, Representer.represent_dict)
47# Config module logger
48log = logging.getLogger(__name__)
50# PATH-like environment variable to use for defaults.
51CONFIG_PATH = "DAF_BUTLER_CONFIG_PATH"
53try:
54 yamlLoader = yaml.CSafeLoader
55except AttributeError:
56 # Not all installations have the C library
57 # (but assume for mypy's sake that they're the same)
58 yamlLoader = yaml.SafeLoader # type: ignore
61def _doUpdate(d, u):
62 if not isinstance(u, collections.abc.Mapping) or \ 62 ↛ 64line 62 didn't jump to line 64, because the condition on line 62 was never true
63 not isinstance(d, collections.abc.MutableMapping):
64 raise RuntimeError("Only call update with Mapping, not {}".format(type(d)))
65 for k, v in u.items():
66 if isinstance(v, collections.abc.Mapping):
67 d[k] = _doUpdate(d.get(k, {}), v)
68 else:
69 d[k] = v
70 return d
73def _checkNextItem(k, d, create, must_be_dict):
74 """See if k is in d and if it is return the new child."""
75 nextVal = None
76 isThere = False
77 if d is None: 77 ↛ 79line 77 didn't jump to line 79, because the condition on line 77 was never true
78 # We have gone past the end of the hierarchy
79 pass
80 elif not must_be_dict and isinstance(d, collections.abc.Sequence): 80 ↛ 85line 80 didn't jump to line 85, because the condition on line 80 was never true
81 # Check for Sequence first because for lists
82 # __contains__ checks whether value is found in list
83 # not whether the index exists in list. When we traverse
84 # the hierarchy we are interested in the index.
85 try:
86 nextVal = d[int(k)]
87 isThere = True
88 except IndexError:
89 pass
90 except ValueError:
91 isThere = k in d
92 elif k in d:
93 nextVal = d[k]
94 isThere = True
95 elif create: 95 ↛ 96line 95 didn't jump to line 96, because the condition on line 95 was never true
96 d[k] = {}
97 nextVal = d[k]
98 isThere = True
100 return nextVal, isThere
103class Loader(yamlLoader):
104 """YAML Loader that supports file include directives.
106 Uses ``!include`` directive in a YAML file to point to another
107 YAML file to be included. The path in the include directive is relative
108 to the file containing that directive.
110 storageClasses: !include storageClasses.yaml
112 Examples
113 --------
114 >>> with open("document.yaml", "r") as f:
115 data = yaml.load(f, Loader=Loader)
117 Notes
118 -----
119 See https://davidchall.github.io/yaml-includes.html
120 """
122 def __init__(self, stream):
123 super().__init__(stream)
124 # if this is a string and not a stream we may well lack a name
125 try:
126 self._root = ButlerURI(stream.name)
127 except AttributeError:
128 # No choice but to assume a local filesystem
129 self._root = ButlerURI("no-file.yaml")
130 Loader.add_constructor("!include", Loader.include)
132 def include(self, node):
133 result: Union[List[Any], Dict[str, Any]]
134 if isinstance(node, yaml.ScalarNode):
135 return self.extractFile(self.construct_scalar(node))
137 elif isinstance(node, yaml.SequenceNode):
138 result = []
139 for filename in self.construct_sequence(node):
140 result.append(self.extractFile(filename))
141 return result
143 elif isinstance(node, yaml.MappingNode):
144 result = {}
145 for k, v in self.construct_mapping(node).items():
146 result[k] = self.extractFile(v)
147 return result
149 else:
150 print("Error:: unrecognised node type in !include statement", file=sys.stderr)
151 raise yaml.constructor.ConstructorError
153 def extractFile(self, filename):
154 # It is possible for the !include to point to an explicit URI
155 # instead of a relative URI, therefore we first see if it is
156 # scheme-less or not. If it has a scheme we use it directly
157 # if it is scheme-less we use it relative to the file root.
158 requesteduri = ButlerURI(filename, forceAbsolute=False)
160 if requesteduri.scheme:
161 fileuri = requesteduri
162 else:
163 fileuri = self._root.updatedFile(filename)
165 log.debug("Opening YAML file via !include: %s", fileuri)
167 # Read all the data from the resource
168 data = fileuri.read()
170 # Store the bytes into a BytesIO so we can attach a .name
171 stream = io.BytesIO(data)
172 stream.name = fileuri.geturl()
173 return yaml.load(stream, Loader)
176class Config(collections.abc.MutableMapping):
177 r"""Implements a datatype that is used by `Butler` for configuration.
179 It is essentially a `dict` with key/value pairs, including nested dicts
180 (as values). In fact, it can be initialized with a `dict`.
181 This is explained next:
183 Config extends the `dict` api so that hierarchical values may be accessed
184 with delimited notation or as a tuple. If a string is given the delimiter
185 is picked up from the first character in that string. For example,
186 ``foo.getValue(".a.b.c")``, ``foo["a"]["b"]["c"]``, ``foo["a", "b", "c"]``,
187 ``foo[".a.b.c"]``, and ``foo["/a/b/c"]`` all achieve the same outcome.
188 If the first character is alphanumeric, no delimiter will be used.
189 ``foo["a.b.c"]`` will be a single key ``a.b.c`` as will ``foo[":a.b.c"]``.
190 Unicode characters can be used as the delimiter for distinctiveness if
191 required.
193 If a key in the hierarchy starts with a non-alphanumeric character care
194 should be used to ensure that either the tuple interface is used or
195 a distinct delimiter is always given in string form.
197 Finally, the delimiter can be escaped if it is part of a key and also
198 has to be used as a delimiter. For example, ``foo[r".a.b\.c"]`` results in
199 a two element hierarchy of ``a`` and ``b.c``. For hard-coded strings it is
200 always better to use a different delimiter in these cases.
202 Note that adding a multi-level key implicitly creates any nesting levels
203 that do not exist, but removing multi-level keys does not automatically
204 remove empty nesting levels. As a result:
206 >>> c = Config()
207 >>> c[".a.b"] = 1
208 >>> del c[".a.b"]
209 >>> c["a"]
210 Config({'a': {}})
212 Storage formats supported:
214 - yaml: read and write is supported.
215 - json: read and write is supported but no ``!include`` directive.
217 Parameters
218 ----------
219 other : `str` or `Config` or `dict` or `ButlerURI` or `pathlib.Path`
220 Other source of configuration, can be:
222 - (`str` or `ButlerURI`) Treated as a URI to a config file. Must end
223 with ".yaml".
224 - (`Config`) Copies the other Config's values into this one.
225 - (`dict`) Copies the values from the dict into this Config.
227 If `None` is provided an empty `Config` will be created.
228 """
230 _D: str = "→"
231 """Default internal delimiter to use for components in the hierarchy when
232 constructing keys for external use (see `Config.names()`)."""
234 includeKey: ClassVar[str] = "includeConfigs"
235 """Key used to indicate that another config should be included at this
236 part of the hierarchy."""
238 resourcesPackage: str = "lsst.daf.butler"
239 """Package to search for default configuration data. The resources
240 themselves will be within a ``configs`` resource hierarchy."""
242 def __init__(self, other=None):
243 self._data: Dict[str, Any] = {}
244 self.configFile = None
246 if other is None:
247 return
249 if isinstance(other, Config):
250 self._data = copy.deepcopy(other._data)
251 self.configFile = other.configFile
252 elif isinstance(other, (dict, collections.abc.Mapping)):
253 # In most cases we have a dict, and it's more efficient
254 # to check for a dict instance before checking the generic mapping.
255 self.update(other)
256 elif isinstance(other, (str, ButlerURI, Path)): 256 ↛ 263line 256 didn't jump to line 263, because the condition on line 256 was never false
257 # if other is a string, assume it is a file path/URI
258 self.__initFromUri(other)
259 self._processExplicitIncludes()
260 else:
261 # if the config specified by other could not be recognized raise
262 # a runtime error.
263 raise RuntimeError(f"A Config could not be loaded from other: {other}")
265 def ppprint(self):
266 """Return config as formatted readable string.
268 Examples
269 --------
270 use: ``pdb> print(myConfigObject.ppprint())``
272 Returns
273 -------
274 s : `str`
275 A prettyprint formatted string representing the config
276 """
277 return pprint.pformat(self._data, indent=2, width=1)
279 def __repr__(self):
280 return f"{type(self).__name__}({self._data!r})"
282 def __str__(self):
283 return self.ppprint()
285 def __len__(self):
286 return len(self._data)
288 def __iter__(self):
289 return iter(self._data)
291 def copy(self):
292 return type(self)(self)
294 @classmethod
295 def fromString(cls, string: str, format: str = "yaml") -> Config:
296 """Create a new Config instance from a serialized string.
298 Parameters
299 ----------
300 string : `str`
301 String containing content in specified format
302 format : `str`, optional
303 Format of the supplied string. Can be ``json`` or ``yaml``.
305 Returns
306 -------
307 c : `Config`
308 Newly-constructed Config.
309 """
310 if format == "yaml":
311 new_config = cls().__initFromYaml(string)
312 elif format == "json":
313 new_config = cls().__initFromJson(string)
314 else:
315 raise ValueError(f"Unexpected format of string: {format}")
316 new_config._processExplicitIncludes()
317 return new_config
319 @classmethod
320 def fromYaml(cls, string: str) -> Config:
321 """Create a new Config instance from a YAML string.
323 Parameters
324 ----------
325 string : `str`
326 String containing content in YAML format
328 Returns
329 -------
330 c : `Config`
331 Newly-constructed Config.
332 """
333 return cls.fromString(string, format="yaml")
335 def __initFromUri(self, path: Union[str, ButlerURI, Path]) -> None:
336 """Load a file from a path or an URI.
338 Parameters
339 ----------
340 path : `str`
341 Path or a URI to a persisted config file.
342 """
343 uri = ButlerURI(path)
344 ext = uri.getExtension()
345 if ext == ".yaml": 345 ↛ 352line 345 didn't jump to line 352, because the condition on line 345 was never false
346 log.debug("Opening YAML config file: %s", uri.geturl())
347 content = uri.read()
348 # Use a stream so we can name it
349 stream = io.BytesIO(content)
350 stream.name = uri.geturl()
351 self.__initFromYaml(stream)
352 elif ext == ".json":
353 log.debug("Opening JSON config file: %s", uri.geturl())
354 content = uri.read()
355 self.__initFromJson(content)
356 else:
357 # This URI does not have a valid extension. It might be because
358 # we ended up with a directory and not a file. Before we complain
359 # about an extension, do an existence check. No need to do
360 # the (possibly expensive) existence check in the default code
361 # path above because we will find out soon enough that the file
362 # is not there.
363 if not uri.exists():
364 raise FileNotFoundError(f"Config location {uri} does not exist.")
365 raise RuntimeError(f"The Config URI does not have a supported extension: {uri}")
366 self.configFile = uri
368 def __initFromYaml(self, stream):
369 """Load a YAML config from any readable stream that contains one.
371 Parameters
372 ----------
373 stream: `IO` or `str`
374 Stream to pass to the YAML loader. Accepts anything that
375 `yaml.load` accepts. This can include a string as well as an
376 IO stream.
378 Raises
379 ------
380 yaml.YAMLError
381 If there is an error loading the file.
382 """
383 content = yaml.load(stream, Loader=Loader)
384 if content is None: 384 ↛ 385line 384 didn't jump to line 385, because the condition on line 384 was never true
385 content = {}
386 self._data = content
387 return self
389 def __initFromJson(self, stream):
390 """Load a JSON config from any readable stream that contains one.
392 Parameters
393 ----------
394 stream: `IO` or `str`
395 Stream to pass to the JSON loader. This can include a string as
396 well as an IO stream.
398 Raises
399 ------
400 TypeError:
401 Raised if there is an error loading the content.
402 """
403 if isinstance(stream, (bytes, str)):
404 content = json.loads(stream)
405 else:
406 content = json.load(stream)
407 if content is None:
408 content = {}
409 self._data = content
410 return self
412 def _processExplicitIncludes(self):
413 """Scan through the configuration searching for the special includes.
415 Looks for ``includeConfigs`` directive and processes the includes.
416 """
417 # Search paths for config files
418 searchPaths = [ButlerURI(os.path.curdir, forceDirectory=True)]
419 if self.configFile is not None: 419 ↛ 427line 419 didn't jump to line 427, because the condition on line 419 was never false
420 if isinstance(self.configFile, ButlerURI): 420 ↛ 423line 420 didn't jump to line 423, because the condition on line 420 was never false
421 configDir = self.configFile.dirname()
422 else:
423 raise RuntimeError(f"Unexpected type for config file: {self.configFile}")
424 searchPaths.append(configDir)
426 # Ensure we know what delimiter to use
427 names = self.nameTuples()
428 for path in names:
429 if path[-1] == self.includeKey: 429 ↛ 431line 429 didn't jump to line 431, because the condition on line 429 was never true
431 log.debug("Processing file include directive at %s", self._D + self._D.join(path))
432 basePath = path[:-1]
434 # Extract the includes and then delete them from the config
435 includes = self[path]
436 del self[path]
438 # Be consistent and convert to a list
439 if not isinstance(includes, list):
440 includes = [includes]
442 # Read each file assuming it is a reference to a file
443 # The file can be relative to config file or cwd
444 # ConfigSubset search paths are not used
445 subConfigs = []
446 for fileName in includes:
447 # Expand any shell variables -- this could be URI
448 fileName = ButlerURI(os.path.expandvars(fileName), forceAbsolute=False)
449 found = None
450 if fileName.isabs():
451 found = fileName
452 else:
453 for dir in searchPaths:
454 if isinstance(dir, ButlerURI):
455 specific = dir.join(fileName.path)
456 # Remote resource check might be expensive
457 if specific.exists():
458 found = specific
459 else:
460 log.warning("Do not understand search path entry '%s' of type %s",
461 dir, type(dir).__name__)
462 if not found:
463 raise RuntimeError(f"Unable to find referenced include file: {fileName}")
465 # Read the referenced Config as a Config
466 subConfigs.append(type(self)(found))
468 # Now we need to merge these sub configs with the current
469 # information that was present in this node in the config
470 # tree with precedence given to the explicit values
471 newConfig = subConfigs.pop(0)
472 for sc in subConfigs:
473 newConfig.update(sc)
475 # Explicit values take precedence
476 if not basePath:
477 # This is an include at the root config
478 newConfig.update(self)
479 # Replace the current config
480 self._data = newConfig._data
481 else:
482 newConfig.update(self[basePath])
483 # And reattach to the base config
484 self[basePath] = newConfig
486 @staticmethod
487 def _splitIntoKeys(key):
488 r"""Split the argument for get/set/in into a hierarchical list.
490 Parameters
491 ----------
492 key : `str` or iterable
493 Argument given to get/set/in. If an iterable is provided it will
494 be converted to a list. If the first character of the string
495 is not an alphanumeric character then it will be used as the
496 delimiter for the purposes of splitting the remainder of the
497 string. If the delimiter is also in one of the keys then it
498 can be escaped using ``\``. There is no default delimiter.
500 Returns
501 -------
502 keys : `list`
503 Hierarchical keys as a `list`.
504 """
505 if isinstance(key, str):
506 if not key[0].isalnum(): 506 ↛ 507line 506 didn't jump to line 507, because the condition on line 506 was never true
507 d = key[0]
508 key = key[1:]
509 else:
510 return [key, ]
511 escaped = f"\\{d}"
512 temp = None
513 if escaped in key:
514 # Complain at the attempt to escape the escape
515 doubled = fr"\{escaped}"
516 if doubled in key:
517 raise ValueError(f"Escaping an escaped delimiter ({doubled} in {key})"
518 " is not yet supported.")
519 # Replace with a character that won't be in the string
520 temp = "\r"
521 if temp in key or d == temp:
522 raise ValueError(f"Can not use character {temp!r} in hierarchical key or as"
523 " delimiter if escaping the delimiter")
524 key = key.replace(escaped, temp)
525 hierarchy = key.split(d)
526 if temp:
527 hierarchy = [h.replace(temp, d) for h in hierarchy]
528 return hierarchy
529 elif isinstance(key, collections.abc.Iterable): 529 ↛ 533line 529 didn't jump to line 533, because the condition on line 529 was never false
530 return list(key)
531 else:
532 # Not sure what this is so try it anyway
533 return [key, ]
535 def _getKeyHierarchy(self, name):
536 """Retrieve the key hierarchy for accessing the Config.
538 Parameters
539 ----------
540 name : `str` or `tuple`
541 Delimited string or `tuple` of hierarchical keys.
543 Returns
544 -------
545 hierarchy : `list` of `str`
546 Hierarchy to use as a `list`. If the name is available directly
547 as a key in the Config it will be used regardless of the presence
548 of any nominal delimiter.
549 """
550 if name in self._data:
551 keys = [name, ]
552 else:
553 keys = self._splitIntoKeys(name)
554 return keys
556 def _findInHierarchy(self, keys, create=False):
557 """Look for hierarchy of keys in Config.
559 Parameters
560 ----------
561 keys : `list` or `tuple`
562 Keys to search in hierarchy.
563 create : `bool`, optional
564 If `True`, if a part of the hierarchy does not exist, insert an
565 empty `dict` into the hierarchy.
567 Returns
568 -------
569 hierarchy : `list`
570 List of the value corresponding to each key in the supplied
571 hierarchy. Only keys that exist in the hierarchy will have
572 a value.
573 complete : `bool`
574 `True` if the full hierarchy exists and the final element
575 in ``hierarchy`` is the value of relevant value.
576 """
577 d = self._data
579 # For the first key, d must be a dict so it is a waste
580 # of time to check for a sequence.
581 must_be_dict = True
583 hierarchy = []
584 complete = True
585 for k in keys:
586 d, isThere = _checkNextItem(k, d, create, must_be_dict)
587 if isThere:
588 hierarchy.append(d)
589 else:
590 complete = False
591 break
592 # Second time round it might be a sequence.
593 must_be_dict = False
595 return hierarchy, complete
597 def __getitem__(self, name):
598 # Override the split for the simple case where there is an exact
599 # match. This allows `Config.items()` to work via a simple
600 # __iter__ implementation that returns top level keys of
601 # self._data.
603 # If the name matches a key in the top-level hierarchy, bypass
604 # all further cleverness.
605 found_directly = False
606 try:
607 data = self._data[name]
608 found_directly = True
609 except KeyError:
610 pass
612 if not found_directly: 612 ↛ 613line 612 didn't jump to line 613, because the condition on line 612 was never true
613 keys = self._getKeyHierarchy(name)
615 hierarchy, complete = self._findInHierarchy(keys)
616 if not complete:
617 raise KeyError(f"{name} not found")
618 data = hierarchy[-1]
620 # In most cases we have a dict, and it's more efficient
621 # to check for a dict instance before checking the generic mapping.
622 if isinstance(data, (dict, collections.abc.Mapping)):
623 data = Config(data)
624 # Ensure that child configs inherit the parent internal delimiter
625 if self._D != Config._D: 625 ↛ 626line 625 didn't jump to line 626, because the condition on line 625 was never true
626 data._D = self._D
627 return data
629 def __setitem__(self, name, value):
630 keys = self._getKeyHierarchy(name)
631 last = keys.pop()
632 if isinstance(value, Config):
633 value = copy.deepcopy(value._data)
635 hierarchy, complete = self._findInHierarchy(keys, create=True)
636 if hierarchy:
637 data = hierarchy[-1]
638 else:
639 data = self._data
641 try:
642 data[last] = value
643 except TypeError:
644 data[int(last)] = value
646 def __contains__(self, key):
647 keys = self._getKeyHierarchy(key)
648 hierarchy, complete = self._findInHierarchy(keys)
649 return complete
651 def __delitem__(self, key):
652 keys = self._getKeyHierarchy(key)
653 last = keys.pop()
654 hierarchy, complete = self._findInHierarchy(keys)
655 if complete: 655 ↛ 662line 655 didn't jump to line 662, because the condition on line 655 was never false
656 if hierarchy: 656 ↛ 657line 656 didn't jump to line 657, because the condition on line 656 was never true
657 data = hierarchy[-1]
658 else:
659 data = self._data
660 del data[last]
661 else:
662 raise KeyError(f"{key} not found in Config")
664 def update(self, other):
665 """Update config from other `Config` or `dict`.
667 Like `dict.update()`, but will add or modify keys in nested dicts,
668 instead of overwriting the nested dict entirely.
670 Parameters
671 ----------
672 other : `dict` or `Config`
673 Source of configuration:
675 Examples
676 --------
677 >>> c = Config({"a": {"b": 1}})
678 >>> c.update({"a": {"c": 2}})
679 >>> print(c)
680 {'a': {'b': 1, 'c': 2}}
682 >>> foo = {"a": {"b": 1}}
683 >>> foo.update({"a": {"c": 2}})
684 >>> print(foo)
685 {'a': {'c': 2}}
686 """
687 _doUpdate(self._data, other)
689 def merge(self, other):
690 """Merge another Config into this one.
692 Like `Config.update()`, but will add keys & values from other that
693 DO NOT EXIST in self.
695 Keys and values that already exist in self will NOT be overwritten.
697 Parameters
698 ----------
699 other : `dict` or `Config`
700 Source of configuration:
701 """
702 if not isinstance(other, collections.abc.Mapping):
703 raise TypeError(f"Can only merge a Mapping into a Config, not {type(other)}")
705 # Convert the supplied mapping to a Config for consistency
706 # This will do a deepcopy if it is already a Config
707 otherCopy = Config(other)
708 otherCopy.update(self)
709 self._data = otherCopy._data
711 def nameTuples(self, topLevelOnly=False):
712 """Get tuples representing the name hierarchies of all keys.
714 The tuples returned from this method are guaranteed to be usable
715 to access items in the configuration object.
717 Parameters
718 ----------
719 topLevelOnly : `bool`, optional
720 If False, the default, a full hierarchy of names is returned.
721 If True, only the top level are returned.
723 Returns
724 -------
725 names : `list` of `tuple` of `str`
726 List of all names present in the `Config` where each element
727 in the list is a `tuple` of strings representing the hierarchy.
728 """
729 if topLevelOnly: 729 ↛ 730line 729 didn't jump to line 730, because the condition on line 729 was never true
730 return list((k,) for k in self)
732 def getKeysAsTuples(d, keys, base):
733 if isinstance(d, collections.abc.Sequence):
734 theseKeys = range(len(d))
735 else:
736 theseKeys = d.keys()
737 for key in theseKeys:
738 val = d[key]
739 levelKey = base + (key,) if base is not None else (key,)
740 keys.append(levelKey)
741 if isinstance(val, (collections.abc.Mapping, collections.abc.Sequence)) \
742 and not isinstance(val, str):
743 getKeysAsTuples(val, keys, levelKey)
744 keys: List[Tuple[str, ...]] = []
745 getKeysAsTuples(self._data, keys, None)
746 return keys
748 def names(self, topLevelOnly=False, delimiter=None):
749 """Get a delimited name of all the keys in the hierarchy.
751 The values returned from this method are guaranteed to be usable
752 to access items in the configuration object.
754 Parameters
755 ----------
756 topLevelOnly : `bool`, optional
757 If False, the default, a full hierarchy of names is returned.
758 If True, only the top level are returned.
759 delimiter : `str`, optional
760 Delimiter to use when forming the keys. If the delimiter is
761 present in any of the keys, it will be escaped in the returned
762 names. If `None` given a delimiter will be automatically provided.
763 The delimiter can not be alphanumeric.
765 Returns
766 -------
767 names : `list` of `str`
768 List of all names present in the `Config`.
770 Notes
771 -----
772 This is different than the built-in method `dict.keys`, which will
773 return only the first level keys.
775 Raises
776 ------
777 ValueError:
778 The supplied delimiter is alphanumeric.
779 """
780 if topLevelOnly:
781 return list(self.keys())
783 # Get all the tuples of hierarchical keys
784 nameTuples = self.nameTuples()
786 if delimiter is not None and delimiter.isalnum():
787 raise ValueError(f"Supplied delimiter ({delimiter!r}) must not be alphanumeric.")
789 if delimiter is None:
790 # Start with something, and ensure it does not need to be
791 # escaped (it is much easier to understand if not escaped)
792 delimiter = self._D
794 # Form big string for easy check of delimiter clash
795 combined = "".join("".join(str(s) for s in k) for k in nameTuples)
797 # Try a delimiter and keep trying until we get something that
798 # works.
799 ntries = 0
800 while delimiter in combined:
801 log.debug("Delimiter '%s' could not be used. Trying another.", delimiter)
802 ntries += 1
804 if ntries > 100:
805 raise ValueError(f"Unable to determine a delimiter for Config {self}")
807 # try another one
808 while True:
809 delimiter = chr(ord(delimiter)+1)
810 if not delimiter.isalnum():
811 break
813 log.debug("Using delimiter %r", delimiter)
815 # Form the keys, escaping the delimiter if necessary
816 strings = [delimiter + delimiter.join(str(s).replace(delimiter, f"\\{delimiter}") for s in k)
817 for k in nameTuples]
818 return strings
820 def asArray(self, name):
821 """Get a value as an array.
823 May contain one or more elements.
825 Parameters
826 ----------
827 name : `str`
828 Key to use to retrieve value.
830 Returns
831 -------
832 array : `collections.abc.Sequence`
833 The value corresponding to name, but guaranteed to be returned
834 as a list with at least one element. If the value is a
835 `~collections.abc.Sequence` (and not a `str`) the value itself
836 will be returned, else the value will be the first element.
837 """
838 val = self.get(name)
839 if isinstance(val, str):
840 val = [val]
841 elif not isinstance(val, collections.abc.Sequence):
842 val = [val]
843 return val
845 def __eq__(self, other):
846 if isinstance(other, Config):
847 other = other._data
848 return self._data == other
850 def __ne__(self, other):
851 if isinstance(other, Config):
852 other = other._data
853 return self._data != other
855 #######
856 # i/o #
858 def dump(self, output: Optional[IO] = None, format: str = "yaml") -> Optional[str]:
859 """Write the config to an output stream.
861 Parameters
862 ----------
863 output : `IO`, optional
864 The stream to use for output. If `None` the serialized content
865 will be returned.
866 format : `str`, optional
867 The format to use for the output. Can be "yaml" or "json".
869 Returns
870 -------
871 serialized : `str` or `None`
872 If a stream was given the stream will be used and the return
873 value will be `None`. If the stream was `None` the
874 serialization will be returned as a string.
875 """
876 if format == "yaml":
877 return yaml.safe_dump(self._data, output, default_flow_style=False)
878 elif format == "json":
879 if output is not None:
880 json.dump(self._data, output, ensure_ascii=False)
881 return None
882 else:
883 return json.dumps(self._data, ensure_ascii=False)
884 raise ValueError(f"Unsupported format for Config serialization: {format}")
886 def dumpToUri(self, uri: Union[ButlerURI, str], updateFile: bool = True,
887 defaultFileName: str = "butler.yaml",
888 overwrite: bool = True) -> None:
889 """Write the config to location pointed to by given URI.
891 Currently supports 's3' and 'file' URI schemes.
893 Parameters
894 ----------
895 uri: `str` or `ButlerURI`
896 URI of location where the Config will be written.
897 updateFile : bool, optional
898 If True and uri does not end on a filename with extension, will
899 append `defaultFileName` to the target uri. True by default.
900 defaultFileName : bool, optional
901 The file name that will be appended to target uri if updateFile is
902 True and uri does not end on a file with an extension.
903 overwrite : bool, optional
904 If True the configuration will be written even if it already
905 exists at that location.
906 """
907 # Make local copy of URI or create new one
908 uri = ButlerURI(uri)
910 if updateFile and not uri.getExtension():
911 uri = uri.updatedFile(defaultFileName)
913 # Try to work out the format from the extension
914 ext = uri.getExtension()
915 format = ext[1:].lower()
917 output = self.dump(format=format)
918 assert output is not None, "Config.dump guarantees not-None return when output arg is None"
919 uri.write(output.encode(), overwrite=overwrite)
920 self.configFile = uri
922 @staticmethod
923 def updateParameters(configType, config, full, toUpdate=None, toCopy=None, overwrite=True, toMerge=None):
924 """Update specific config parameters.
926 Allows for named parameters to be set to new values in bulk, and
927 for other values to be set by copying from a reference config.
929 Assumes that the supplied config is compatible with ``configType``
930 and will attach the updated values to the supplied config by
931 looking for the related component key. It is assumed that
932 ``config`` and ``full`` are from the same part of the
933 configuration hierarchy.
935 Parameters
936 ----------
937 configType : `ConfigSubset`
938 Config type to use to extract relevant items from ``config``.
939 config : `Config`
940 A `Config` to update. Only the subset understood by
941 the supplied `ConfigSubset` will be modified. Default values
942 will not be inserted and the content will not be validated
943 since mandatory keys are allowed to be missing until
944 populated later by merging.
945 full : `Config`
946 A complete config with all defaults expanded that can be
947 converted to a ``configType``. Read-only and will not be
948 modified by this method. Values are read from here if
949 ``toCopy`` is defined.
951 Repository-specific options that should not be obtained
952 from defaults when Butler instances are constructed
953 should be copied from ``full`` to ``config``.
954 toUpdate : `dict`, optional
955 A `dict` defining the keys to update and the new value to use.
956 The keys and values can be any supported by `Config`
957 assignment.
958 toCopy : `tuple`, optional
959 `tuple` of keys whose values should be copied from ``full``
960 into ``config``.
961 overwrite : `bool`, optional
962 If `False`, do not modify a value in ``config`` if the key
963 already exists. Default is always to overwrite.
964 toMerge : `tuple`, optional
965 Keys to merge content from full to config without overwriting
966 pre-existing values. Only works if the key refers to a hierarchy.
967 The ``overwrite`` flag is ignored.
969 Raises
970 ------
971 ValueError
972 Neither ``toUpdate``, ``toCopy`` nor ``toMerge`` were defined.
973 """
974 if toUpdate is None and toCopy is None and toMerge is None:
975 raise ValueError("At least one of toUpdate, toCopy, or toMerge parameters must be set.")
977 # If this is a parent configuration then we need to ensure that
978 # the supplied config has the relevant component key in it.
979 # If this is a parent configuration we add in the stub entry
980 # so that the ConfigSubset constructor will do the right thing.
981 # We check full for this since that is guaranteed to be complete.
982 if configType.component in full and configType.component not in config:
983 config[configType.component] = {}
985 # Extract the part of the config we wish to update
986 localConfig = configType(config, mergeDefaults=False, validate=False)
988 if toUpdate:
989 for key, value in toUpdate.items():
990 if key in localConfig and not overwrite:
991 log.debug("Not overriding key '%s' with value '%s' in config %s",
992 key, value, localConfig.__class__.__name__)
993 else:
994 localConfig[key] = value
996 if toCopy or toMerge:
997 localFullConfig = configType(full, mergeDefaults=False)
999 if toCopy:
1000 for key in toCopy:
1001 if key in localConfig and not overwrite:
1002 log.debug("Not overriding key '%s' from defaults in config %s",
1003 key, localConfig.__class__.__name__)
1004 else:
1005 localConfig[key] = localFullConfig[key]
1006 if toMerge:
1007 for key in toMerge:
1008 if key in localConfig:
1009 # Get the node from the config to do the merge
1010 # but then have to reattach to the config.
1011 subset = localConfig[key]
1012 subset.merge(localFullConfig[key])
1013 localConfig[key] = subset
1014 else:
1015 localConfig[key] = localFullConfig[key]
1017 # Reattach to parent if this is a child config
1018 if configType.component in config:
1019 config[configType.component] = localConfig
1020 else:
1021 config.update(localConfig)
1023 def toDict(self):
1024 """Convert a `Config` to a standalone hierarchical `dict`.
1026 Returns
1027 -------
1028 d : `dict`
1029 The standalone hierarchical `dict` with any `Config` classes
1030 in the hierarchy converted to `dict`.
1032 Notes
1033 -----
1034 This can be useful when passing a Config to some code that
1035 expects native Python types.
1036 """
1037 output = copy.deepcopy(self._data)
1038 for k, v in output.items():
1039 if isinstance(v, Config):
1040 v = v.toDict()
1041 output[k] = v
1042 return output
1045class ConfigSubset(Config):
1046 """Config representing a subset of a more general configuration.
1048 Subclasses define their own component and when given a configuration
1049 that includes that component, the resulting configuration only includes
1050 the subset. For example, your config might contain ``dimensions`` if it's
1051 part of a global config and that subset will be stored. If ``dimensions``
1052 can not be found it is assumed that the entire contents of the
1053 configuration should be used.
1055 Default values are read from the environment or supplied search paths
1056 using the default configuration file name specified in the subclass.
1057 This allows a configuration class to be instantiated without any
1058 additional arguments.
1060 Additional validation can be specified to check for keys that are mandatory
1061 in the configuration.
1063 Parameters
1064 ----------
1065 other : `Config` or `str` or `dict`
1066 Argument specifying the configuration information as understood
1067 by `Config`
1068 validate : `bool`, optional
1069 If `True` required keys will be checked to ensure configuration
1070 consistency.
1071 mergeDefaults : `bool`, optional
1072 If `True` defaults will be read and the supplied config will
1073 be combined with the defaults, with the supplied valiues taking
1074 precedence.
1075 searchPaths : `list` or `tuple`, optional
1076 Explicit additional paths to search for defaults. They should
1077 be supplied in priority order. These paths have higher priority
1078 than those read from the environment in
1079 `ConfigSubset.defaultSearchPaths()`. Paths can be `str` referring to
1080 the local file system or URIs, `ButlerURI`.
1081 """
1083 component: ClassVar[Optional[str]] = None
1084 """Component to use from supplied config. Can be None. If specified the
1085 key is not required. Can be a full dot-separated path to a component.
1086 """
1088 requiredKeys: ClassVar[Sequence[str]] = ()
1089 """Keys that are required to be specified in the configuration.
1090 """
1092 defaultConfigFile: ClassVar[Optional[str]] = None
1093 """Name of the file containing defaults for this config class.
1094 """
1096 def __init__(self, other=None, validate=True, mergeDefaults=True, searchPaths=None):
1098 # Create a blank object to receive the defaults
1099 # Once we have the defaults we then update with the external values
1100 super().__init__()
1102 # Create a standard Config rather than subset
1103 externalConfig = Config(other)
1105 # Select the part we need from it
1106 # To simplify the use of !include we also check for the existence of
1107 # component.component (since the included files can themselves
1108 # include the component name)
1109 if self.component is not None: 1109 ↛ 1118line 1109 didn't jump to line 1118, because the condition on line 1109 was never false
1110 doubled = (self.component, self.component)
1111 # Must check for double depth first
1112 if doubled in externalConfig: 1112 ↛ 1113line 1112 didn't jump to line 1113, because the condition on line 1112 was never true
1113 externalConfig = externalConfig[doubled]
1114 elif self.component in externalConfig:
1115 externalConfig._data = externalConfig._data[self.component]
1117 # Default files read to create this configuration
1118 self.filesRead = []
1120 # Assume we are not looking up child configurations
1121 containerKey = None
1123 # Sometimes we do not want to merge with defaults.
1124 if mergeDefaults:
1126 # Supplied search paths have highest priority
1127 fullSearchPath = []
1128 if searchPaths: 1128 ↛ 1129line 1128 didn't jump to line 1129, because the condition on line 1128 was never true
1129 fullSearchPath.extend(searchPaths)
1131 # Read default paths from enviroment
1132 fullSearchPath.extend(self.defaultSearchPaths())
1134 # There are two places to find defaults for this particular config
1135 # - The "defaultConfigFile" defined in the subclass
1136 # - The class specified in the "cls" element in the config.
1137 # Read cls after merging in case it changes.
1138 if self.defaultConfigFile is not None: 1138 ↛ 1143line 1138 didn't jump to line 1143, because the condition on line 1138 was never false
1139 self._updateWithConfigsFromPath(fullSearchPath, self.defaultConfigFile)
1141 # Can have a class specification in the external config (priority)
1142 # or from the defaults.
1143 pytype = None
1144 if "cls" in externalConfig: 1144 ↛ 1145line 1144 didn't jump to line 1145, because the condition on line 1144 was never true
1145 pytype = externalConfig["cls"]
1146 elif "cls" in self: 1146 ↛ 1147line 1146 didn't jump to line 1147, because the condition on line 1146 was never true
1147 pytype = self["cls"]
1149 if pytype is not None: 1149 ↛ 1150line 1149 didn't jump to line 1150, because the condition on line 1149 was never true
1150 try:
1151 cls = doImport(pytype)
1152 except ImportError as e:
1153 raise RuntimeError(f"Failed to import cls '{pytype}' for config {type(self)}") from e
1154 defaultsFile = cls.defaultConfigFile
1155 if defaultsFile is not None:
1156 self._updateWithConfigsFromPath(fullSearchPath, defaultsFile)
1158 # Get the container key in case we need it
1159 try:
1160 containerKey = cls.containerKey
1161 except AttributeError:
1162 pass
1164 # Now update this object with the external values so that the external
1165 # values always override the defaults
1166 self.update(externalConfig)
1168 # If this configuration has child configurations of the same
1169 # config class, we need to expand those defaults as well.
1171 if mergeDefaults and containerKey is not None and containerKey in self: 1171 ↛ 1172line 1171 didn't jump to line 1172, because the condition on line 1171 was never true
1172 for idx, subConfig in enumerate(self[containerKey]):
1173 self[containerKey, idx] = type(self)(other=subConfig, validate=validate,
1174 mergeDefaults=mergeDefaults,
1175 searchPaths=searchPaths)
1177 if validate:
1178 self.validate()
1180 @classmethod
1181 def defaultSearchPaths(cls):
1182 """Read environment to determine search paths to use.
1184 Global defaults, at lowest priority, are found in the ``config``
1185 directory of the butler source tree. Additional defaults can be
1186 defined using the environment variable ``$DAF_BUTLER_CONFIG_PATHS``
1187 which is a PATH-like variable where paths at the front of the list
1188 have priority over those later.
1190 Returns
1191 -------
1192 paths : `list`
1193 Returns a list of paths to search. The returned order is in
1194 priority with the highest priority paths first. The butler config
1195 configuration resources will not be included here but will
1196 always be searched last.
1198 Notes
1199 -----
1200 The environment variable is split on the standard ``:`` path separator.
1201 This currently makes it incompatible with usage of URIs.
1202 """
1203 # We can pick up defaults from multiple search paths
1204 # We fill defaults by using the butler config path and then
1205 # the config path environment variable in reverse order.
1206 defaultsPaths: List[Union[str, ButlerURI]] = []
1208 if CONFIG_PATH in os.environ: 1208 ↛ 1209line 1208 didn't jump to line 1209, because the condition on line 1208 was never true
1209 externalPaths = os.environ[CONFIG_PATH].split(os.pathsep)
1210 defaultsPaths.extend(externalPaths)
1212 # Add the package defaults as a resource
1213 defaultsPaths.append(ButlerURI(f"resource://{cls.resourcesPackage}/configs",
1214 forceDirectory=True))
1215 return defaultsPaths
1217 def _updateWithConfigsFromPath(self, searchPaths, configFile):
1218 """Search the supplied paths, merging the configuration values.
1220 The values read will override values currently stored in the object.
1221 Every file found in the path will be read, such that the earlier
1222 path entries have higher priority.
1224 Parameters
1225 ----------
1226 searchPaths : `list` of `ButlerURI`, `str`
1227 Paths to search for the supplied configFile. This path
1228 is the priority order, such that files read from the
1229 first path entry will be selected over those read from
1230 a later path. Can contain `str` referring to the local file
1231 system or a URI string.
1232 configFile : `ButlerURI`
1233 File to locate in path. If absolute path it will be read
1234 directly and the search path will not be used. Can be a URI
1235 to an explicit resource (which will ignore the search path)
1236 which is assumed to exist.
1237 """
1238 uri = ButlerURI(configFile)
1239 if uri.isabs() and uri.exists(): 1239 ↛ 1241line 1239 didn't jump to line 1241, because the condition on line 1239 was never true
1240 # Assume this resource exists
1241 self._updateWithOtherConfigFile(configFile)
1242 self.filesRead.append(configFile)
1243 else:
1244 # Reverse order so that high priority entries
1245 # update the object last.
1246 for pathDir in reversed(searchPaths):
1247 if isinstance(pathDir, (str, ButlerURI)): 1247 ↛ 1254line 1247 didn't jump to line 1254, because the condition on line 1247 was never false
1248 pathDir = ButlerURI(pathDir, forceDirectory=True)
1249 file = pathDir.join(configFile)
1250 if file.exists(): 1250 ↛ 1246line 1250 didn't jump to line 1246, because the condition on line 1250 was never false
1251 self.filesRead.append(file)
1252 self._updateWithOtherConfigFile(file)
1253 else:
1254 raise ValueError(f"Unexpected search path type encountered: {pathDir!r}")
1256 def _updateWithOtherConfigFile(self, file):
1257 """Read in some defaults and update.
1259 Update the configuration by reading the supplied file as a config
1260 of this class, and merging such that these values override the
1261 current values. Contents of the external config are not validated.
1263 Parameters
1264 ----------
1265 file : `Config`, `str`, `ButlerURI`, or `dict`
1266 Entity that can be converted to a `ConfigSubset`.
1267 """
1268 # Use this class to read the defaults so that subsetting can happen
1269 # correctly.
1270 externalConfig = type(self)(file, validate=False, mergeDefaults=False)
1271 self.update(externalConfig)
1273 def validate(self):
1274 """Check that mandatory keys are present in this configuration.
1276 Ignored if ``requiredKeys`` is empty.
1277 """
1278 # Validation
1279 missing = [k for k in self.requiredKeys if k not in self._data]
1280 if missing: 1280 ↛ 1281line 1280 didn't jump to line 1281, because the condition on line 1280 was never true
1281 raise KeyError(f"Mandatory keys ({missing}) missing from supplied configuration for {type(self)}")