Coverage for python/lsst/daf/butler/core/config.py: 44%
487 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-02-28 10:37 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2023-02-28 10:37 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24"""Configuration control."""
26__all__ = ("Config", "ConfigSubset")
28import copy
29import io
30import json
31import logging
32import os
33import pprint
34import sys
35from collections import defaultdict
36from collections.abc import Iterable, Mapping, MutableMapping, Sequence
37from pathlib import Path
38from typing import IO, TYPE_CHECKING, Any, ClassVar
40import yaml
41from lsst.resources import ResourcePath, ResourcePathExpression
42from lsst.utils import doImport
43from yaml.representer import Representer
45yaml.add_representer(defaultdict, Representer.represent_dict)
48# Config module logger
49log = logging.getLogger(__name__)
51# PATH-like environment variable to use for defaults.
52CONFIG_PATH = "DAF_BUTLER_CONFIG_PATH"
54if TYPE_CHECKING: 54 ↛ 55line 54 didn't jump to line 55, because the condition on line 54 was never true
55 yamlLoader = yaml.SafeLoader
56else:
57 try:
58 yamlLoader = yaml.CSafeLoader
59 except AttributeError:
60 # Not all installations have the C library
61 # (but assume for mypy's sake that they're the same)
62 yamlLoader = yaml.SafeLoader
65def _doUpdate(d, u):
66 if not isinstance(u, Mapping) or not isinstance(d, MutableMapping): 66 ↛ 67line 66 didn't jump to line 67, because the condition on line 66 was never true
67 raise RuntimeError("Only call update with Mapping, not {}".format(type(d)))
68 for k, v in u.items():
69 if isinstance(v, Mapping):
70 d[k] = _doUpdate(d.get(k, {}), v)
71 else:
72 d[k] = v
73 return d
76def _checkNextItem(k, d, create, must_be_dict):
77 """See if k is in d and if it is return the new child."""
78 nextVal = None
79 isThere = False
80 if d is None: 80 ↛ 82line 80 didn't jump to line 82, because the condition on line 80 was never true
81 # We have gone past the end of the hierarchy
82 pass
83 elif not must_be_dict and isinstance(d, Sequence): 83 ↛ 88line 83 didn't jump to line 88, because the condition on line 83 was never true
84 # Check for Sequence first because for lists
85 # __contains__ checks whether value is found in list
86 # not whether the index exists in list. When we traverse
87 # the hierarchy we are interested in the index.
88 try:
89 nextVal = d[int(k)]
90 isThere = True
91 except IndexError:
92 pass
93 except ValueError:
94 isThere = k in d
95 elif k in d:
96 nextVal = d[k]
97 isThere = True
98 elif create: 98 ↛ 99line 98 didn't jump to line 99, because the condition on line 98 was never true
99 d[k] = {}
100 nextVal = d[k]
101 isThere = True
103 return nextVal, isThere
106class Loader(yamlLoader):
107 """YAML Loader that supports file include directives.
109 Uses ``!include`` directive in a YAML file to point to another
110 YAML file to be included. The path in the include directive is relative
111 to the file containing that directive.
113 storageClasses: !include storageClasses.yaml
115 Examples
116 --------
117 >>> with open("document.yaml", "r") as f:
118 data = yaml.load(f, Loader=Loader)
120 Notes
121 -----
122 See https://davidchall.github.io/yaml-includes.html
123 """
125 def __init__(self, stream: Any): # types-PyYAML annotates 'stream' with a private type
126 super().__init__(stream)
127 # if this is a string and not a stream we may well lack a name
128 try:
129 self._root = ResourcePath(stream.name)
130 except AttributeError:
131 # No choice but to assume a local filesystem
132 self._root = ResourcePath("no-file.yaml")
133 Loader.add_constructor("!include", Loader.include)
135 def include(self, node: yaml.Node) -> list[Any] | dict[str, Any]:
136 result: list[Any] | dict[str, Any]
137 if isinstance(node, yaml.ScalarNode):
138 return self.extractFile(self.construct_scalar(node))
140 elif isinstance(node, yaml.SequenceNode):
141 result = []
142 for filename in self.construct_sequence(node):
143 result.append(self.extractFile(filename))
144 return result
146 elif isinstance(node, yaml.MappingNode):
147 result = {}
148 for k, v in self.construct_mapping(node).items():
149 if not isinstance(k, str):
150 raise TypeError(f"Expected only strings in YAML mapping; got {k!r} of type {type(k)}.")
151 result[k] = self.extractFile(v)
152 return result
154 else:
155 print("Error:: unrecognised node type in !include statement", file=sys.stderr)
156 raise yaml.constructor.ConstructorError
158 def extractFile(self, filename):
159 # It is possible for the !include to point to an explicit URI
160 # instead of a relative URI, therefore we first see if it is
161 # scheme-less or not. If it has a scheme we use it directly
162 # if it is scheme-less we use it relative to the file root.
163 requesteduri = ResourcePath(filename, forceAbsolute=False)
165 if requesteduri.scheme:
166 fileuri = requesteduri
167 else:
168 fileuri = self._root.updatedFile(filename)
170 log.debug("Opening YAML file via !include: %s", fileuri)
172 # Read all the data from the resource
173 data = fileuri.read()
175 # Store the bytes into a BytesIO so we can attach a .name
176 stream = io.BytesIO(data)
177 stream.name = fileuri.geturl()
178 return yaml.load(stream, Loader)
181class Config(MutableMapping):
182 r"""Implements a datatype that is used by `Butler` for configuration.
184 It is essentially a `dict` with key/value pairs, including nested dicts
185 (as values). In fact, it can be initialized with a `dict`.
186 This is explained next:
188 Config extends the `dict` api so that hierarchical values may be accessed
189 with delimited notation or as a tuple. If a string is given the delimiter
190 is picked up from the first character in that string. For example,
191 ``foo.getValue(".a.b.c")``, ``foo["a"]["b"]["c"]``, ``foo["a", "b", "c"]``,
192 ``foo[".a.b.c"]``, and ``foo["/a/b/c"]`` all achieve the same outcome.
193 If the first character is alphanumeric, no delimiter will be used.
194 ``foo["a.b.c"]`` will be a single key ``a.b.c`` as will ``foo[":a.b.c"]``.
195 Unicode characters can be used as the delimiter for distinctiveness if
196 required.
198 If a key in the hierarchy starts with a non-alphanumeric character care
199 should be used to ensure that either the tuple interface is used or
200 a distinct delimiter is always given in string form.
202 Finally, the delimiter can be escaped if it is part of a key and also
203 has to be used as a delimiter. For example, ``foo[r".a.b\.c"]`` results in
204 a two element hierarchy of ``a`` and ``b.c``. For hard-coded strings it is
205 always better to use a different delimiter in these cases.
207 Note that adding a multi-level key implicitly creates any nesting levels
208 that do not exist, but removing multi-level keys does not automatically
209 remove empty nesting levels. As a result:
211 >>> c = Config()
212 >>> c[".a.b"] = 1
213 >>> del c[".a.b"]
214 >>> c["a"]
215 Config({'a': {}})
217 Storage formats supported:
219 - yaml: read and write is supported.
220 - json: read and write is supported but no ``!include`` directive.
222 Parameters
223 ----------
224 other : `lsst.resources.ResourcePath` or `Config` or `dict`
225 Other source of configuration, can be:
227 - (`lsst.resources.ResourcePathExpression`)
228 Treated as a URI to a config file. Must end with ".yaml".
229 - (`Config`) Copies the other Config's values into this one.
230 - (`dict`) Copies the values from the dict into this Config.
232 If `None` is provided an empty `Config` will be created.
233 """
235 _D: str = "→"
236 """Default internal delimiter to use for components in the hierarchy when
237 constructing keys for external use (see `Config.names()`)."""
239 includeKey: ClassVar[str] = "includeConfigs"
240 """Key used to indicate that another config should be included at this
241 part of the hierarchy."""
243 resourcesPackage: str = "lsst.daf.butler"
244 """Package to search for default configuration data. The resources
245 themselves will be within a ``configs`` resource hierarchy."""
247 def __init__(self, other: ResourcePathExpression | Config | Mapping[str, Any] | None = None):
248 self._data: dict[str, Any] = {}
249 self.configFile: ResourcePath | None = None
251 if other is None:
252 return
254 if isinstance(other, Config):
255 # Deep copy might be more efficient but if someone has overridden
256 # a config entry to store a complex object then deep copy may
257 # fail. Safer to use update().
258 self.update(other._data)
259 self.configFile = other.configFile
260 elif isinstance(other, (dict, Mapping)):
261 # In most cases we have a dict, and it's more efficient
262 # to check for a dict instance before checking the generic mapping.
263 self.update(other)
264 elif isinstance(other, (str, ResourcePath, Path)): 264 ↛ 271line 264 didn't jump to line 271, because the condition on line 264 was never false
265 # if other is a string, assume it is a file path/URI
266 self.__initFromUri(other)
267 self._processExplicitIncludes()
268 else:
269 # if the config specified by other could not be recognized raise
270 # a runtime error.
271 raise RuntimeError(f"A Config could not be loaded from other: {other}")
273 def ppprint(self):
274 """Return config as formatted readable string.
276 Examples
277 --------
278 use: ``pdb> print(myConfigObject.ppprint())``
280 Returns
281 -------
282 s : `str`
283 A prettyprint formatted string representing the config
284 """
285 return pprint.pformat(self._data, indent=2, width=1)
287 def __repr__(self):
288 return f"{type(self).__name__}({self._data!r})"
290 def __str__(self):
291 return self.ppprint()
293 def __len__(self):
294 return len(self._data)
296 def __iter__(self):
297 return iter(self._data)
299 def copy(self):
300 return type(self)(self)
302 @classmethod
303 def fromString(cls, string: str, format: str = "yaml") -> Config:
304 """Create a new Config instance from a serialized string.
306 Parameters
307 ----------
308 string : `str`
309 String containing content in specified format
310 format : `str`, optional
311 Format of the supplied string. Can be ``json`` or ``yaml``.
313 Returns
314 -------
315 c : `Config`
316 Newly-constructed Config.
317 """
318 if format == "yaml":
319 new_config = cls().__initFromYaml(string)
320 elif format == "json":
321 new_config = cls().__initFromJson(string)
322 else:
323 raise ValueError(f"Unexpected format of string: {format}")
324 new_config._processExplicitIncludes()
325 return new_config
327 @classmethod
328 def fromYaml(cls, string: str) -> Config:
329 """Create a new Config instance from a YAML string.
331 Parameters
332 ----------
333 string : `str`
334 String containing content in YAML format
336 Returns
337 -------
338 c : `Config`
339 Newly-constructed Config.
340 """
341 return cls.fromString(string, format="yaml")
343 def __initFromUri(self, path: ResourcePathExpression) -> None:
344 """Load a file from a path or an URI.
346 Parameters
347 ----------
348 path : `lsst.resources.ResourcePathExpression`
349 Path or a URI to a persisted config file.
350 """
351 uri = ResourcePath(path)
352 ext = uri.getExtension()
353 if ext == ".yaml": 353 ↛ 360line 353 didn't jump to line 360, because the condition on line 353 was never false
354 log.debug("Opening YAML config file: %s", uri.geturl())
355 content = uri.read()
356 # Use a stream so we can name it
357 stream = io.BytesIO(content)
358 stream.name = uri.geturl()
359 self.__initFromYaml(stream)
360 elif ext == ".json":
361 log.debug("Opening JSON config file: %s", uri.geturl())
362 content = uri.read()
363 self.__initFromJson(content)
364 else:
365 # This URI does not have a valid extension. It might be because
366 # we ended up with a directory and not a file. Before we complain
367 # about an extension, do an existence check. No need to do
368 # the (possibly expensive) existence check in the default code
369 # path above because we will find out soon enough that the file
370 # is not there.
371 if not uri.exists():
372 raise FileNotFoundError(f"Config location {uri} does not exist.")
373 raise RuntimeError(f"The Config URI does not have a supported extension: {uri}")
374 self.configFile = uri
376 def __initFromYaml(self, stream):
377 """Load a YAML config from any readable stream that contains one.
379 Parameters
380 ----------
381 stream: `IO` or `str`
382 Stream to pass to the YAML loader. Accepts anything that
383 `yaml.load` accepts. This can include a string as well as an
384 IO stream.
386 Raises
387 ------
388 yaml.YAMLError
389 If there is an error loading the file.
390 """
391 content = yaml.load(stream, Loader=Loader)
392 if content is None: 392 ↛ 393line 392 didn't jump to line 393, because the condition on line 392 was never true
393 content = {}
394 self._data = content
395 return self
397 def __initFromJson(self, stream):
398 """Load a JSON config from any readable stream that contains one.
400 Parameters
401 ----------
402 stream: `IO` or `str`
403 Stream to pass to the JSON loader. This can include a string as
404 well as an IO stream.
406 Raises
407 ------
408 TypeError:
409 Raised if there is an error loading the content.
410 """
411 if isinstance(stream, (bytes, str)):
412 content = json.loads(stream)
413 else:
414 content = json.load(stream)
415 if content is None:
416 content = {}
417 self._data = content
418 return self
420 def _processExplicitIncludes(self):
421 """Scan through the configuration searching for the special includes.
423 Looks for ``includeConfigs`` directive and processes the includes.
424 """
425 # Search paths for config files
426 searchPaths = [ResourcePath(os.path.curdir, forceDirectory=True)]
427 if self.configFile is not None: 427 ↛ 435line 427 didn't jump to line 435, because the condition on line 427 was never false
428 if isinstance(self.configFile, ResourcePath): 428 ↛ 431line 428 didn't jump to line 431, because the condition on line 428 was never false
429 configDir = self.configFile.dirname()
430 else:
431 raise RuntimeError(f"Unexpected type for config file: {self.configFile}")
432 searchPaths.append(configDir)
434 # Ensure we know what delimiter to use
435 names = self.nameTuples()
436 for path in names:
437 if path[-1] == self.includeKey: 437 ↛ 438line 437 didn't jump to line 438, because the condition on line 437 was never true
438 log.debug("Processing file include directive at %s", self._D + self._D.join(path))
439 basePath = path[:-1]
441 # Extract the includes and then delete them from the config
442 includes = self[path]
443 del self[path]
445 # Be consistent and convert to a list
446 if not isinstance(includes, list):
447 includes = [includes]
449 # Read each file assuming it is a reference to a file
450 # The file can be relative to config file or cwd
451 # ConfigSubset search paths are not used
452 subConfigs = []
453 for fileName in includes:
454 # Expand any shell variables -- this could be URI
455 fileName = ResourcePath(os.path.expandvars(fileName), forceAbsolute=False)
456 found = None
457 if fileName.isabs():
458 found = fileName
459 else:
460 for dir in searchPaths:
461 if isinstance(dir, ResourcePath):
462 specific = dir.join(fileName.path)
463 # Remote resource check might be expensive
464 if specific.exists():
465 found = specific
466 else:
467 log.warning(
468 "Do not understand search path entry '%s' of type %s",
469 dir,
470 type(dir).__name__,
471 )
472 if not found:
473 raise RuntimeError(f"Unable to find referenced include file: {fileName}")
475 # Read the referenced Config as a Config
476 subConfigs.append(type(self)(found))
478 # Now we need to merge these sub configs with the current
479 # information that was present in this node in the config
480 # tree with precedence given to the explicit values
481 newConfig = subConfigs.pop(0)
482 for sc in subConfigs:
483 newConfig.update(sc)
485 # Explicit values take precedence
486 if not basePath:
487 # This is an include at the root config
488 newConfig.update(self)
489 # Replace the current config
490 self._data = newConfig._data
491 else:
492 newConfig.update(self[basePath])
493 # And reattach to the base config
494 self[basePath] = newConfig
496 @staticmethod
497 def _splitIntoKeys(key):
498 r"""Split the argument for get/set/in into a hierarchical list.
500 Parameters
501 ----------
502 key : `str` or iterable
503 Argument given to get/set/in. If an iterable is provided it will
504 be converted to a list. If the first character of the string
505 is not an alphanumeric character then it will be used as the
506 delimiter for the purposes of splitting the remainder of the
507 string. If the delimiter is also in one of the keys then it
508 can be escaped using ``\``. There is no default delimiter.
510 Returns
511 -------
512 keys : `list`
513 Hierarchical keys as a `list`.
514 """
515 if isinstance(key, str):
516 if not key[0].isalnum(): 516 ↛ 517line 516 didn't jump to line 517, because the condition on line 516 was never true
517 d = key[0]
518 key = key[1:]
519 else:
520 return [
521 key,
522 ]
523 escaped = f"\\{d}"
524 temp = None
525 if escaped in key:
526 # Complain at the attempt to escape the escape
527 doubled = rf"\{escaped}"
528 if doubled in key:
529 raise ValueError(
530 f"Escaping an escaped delimiter ({doubled} in {key}) is not yet supported."
531 )
532 # Replace with a character that won't be in the string
533 temp = "\r"
534 if temp in key or d == temp:
535 raise ValueError(
536 f"Can not use character {temp!r} in hierarchical key or as"
537 " delimiter if escaping the delimiter"
538 )
539 key = key.replace(escaped, temp)
540 hierarchy = key.split(d)
541 if temp:
542 hierarchy = [h.replace(temp, d) for h in hierarchy]
543 return hierarchy
544 elif isinstance(key, Iterable): 544 ↛ 548line 544 didn't jump to line 548, because the condition on line 544 was never false
545 return list(key)
546 else:
547 # Not sure what this is so try it anyway
548 return [
549 key,
550 ]
552 def _getKeyHierarchy(self, name):
553 """Retrieve the key hierarchy for accessing the Config.
555 Parameters
556 ----------
557 name : `str` or `tuple`
558 Delimited string or `tuple` of hierarchical keys.
560 Returns
561 -------
562 hierarchy : `list` of `str`
563 Hierarchy to use as a `list`. If the name is available directly
564 as a key in the Config it will be used regardless of the presence
565 of any nominal delimiter.
566 """
567 if name in self._data:
568 keys = [
569 name,
570 ]
571 else:
572 keys = self._splitIntoKeys(name)
573 return keys
575 def _findInHierarchy(self, keys, create=False):
576 """Look for hierarchy of keys in Config.
578 Parameters
579 ----------
580 keys : `list` or `tuple`
581 Keys to search in hierarchy.
582 create : `bool`, optional
583 If `True`, if a part of the hierarchy does not exist, insert an
584 empty `dict` into the hierarchy.
586 Returns
587 -------
588 hierarchy : `list`
589 List of the value corresponding to each key in the supplied
590 hierarchy. Only keys that exist in the hierarchy will have
591 a value.
592 complete : `bool`
593 `True` if the full hierarchy exists and the final element
594 in ``hierarchy`` is the value of relevant value.
595 """
596 d = self._data
598 # For the first key, d must be a dict so it is a waste
599 # of time to check for a sequence.
600 must_be_dict = True
602 hierarchy = []
603 complete = True
604 for k in keys:
605 d, isThere = _checkNextItem(k, d, create, must_be_dict)
606 if isThere:
607 hierarchy.append(d)
608 else:
609 complete = False
610 break
611 # Second time round it might be a sequence.
612 must_be_dict = False
614 return hierarchy, complete
616 def __getitem__(self, name):
617 # Override the split for the simple case where there is an exact
618 # match. This allows `Config.items()` to work via a simple
619 # __iter__ implementation that returns top level keys of
620 # self._data.
622 # If the name matches a key in the top-level hierarchy, bypass
623 # all further cleverness.
624 found_directly = False
625 try:
626 data = self._data[name]
627 found_directly = True
628 except KeyError:
629 pass
631 if not found_directly: 631 ↛ 632line 631 didn't jump to line 632, because the condition on line 631 was never true
632 keys = self._getKeyHierarchy(name)
634 hierarchy, complete = self._findInHierarchy(keys)
635 if not complete:
636 raise KeyError(f"{name} not found")
637 data = hierarchy[-1]
639 # In most cases we have a dict, and it's more efficient
640 # to check for a dict instance before checking the generic mapping.
641 if isinstance(data, (dict, Mapping)):
642 data = Config(data)
643 # Ensure that child configs inherit the parent internal delimiter
644 if self._D != Config._D: 644 ↛ 645line 644 didn't jump to line 645, because the condition on line 644 was never true
645 data._D = self._D
646 return data
648 def __setitem__(self, name, value):
649 keys = self._getKeyHierarchy(name)
650 last = keys.pop()
651 if isinstance(value, Config):
652 value = copy.deepcopy(value._data)
654 hierarchy, complete = self._findInHierarchy(keys, create=True)
655 if hierarchy:
656 data = hierarchy[-1]
657 else:
658 data = self._data
660 try:
661 data[last] = value
662 except TypeError:
663 data[int(last)] = value
665 def __contains__(self, key):
666 keys = self._getKeyHierarchy(key)
667 hierarchy, complete = self._findInHierarchy(keys)
668 return complete
670 def __delitem__(self, key):
671 keys = self._getKeyHierarchy(key)
672 last = keys.pop()
673 hierarchy, complete = self._findInHierarchy(keys)
674 if complete: 674 ↛ 681line 674 didn't jump to line 681, because the condition on line 674 was never false
675 if hierarchy: 675 ↛ 676line 675 didn't jump to line 676, because the condition on line 675 was never true
676 data = hierarchy[-1]
677 else:
678 data = self._data
679 del data[last]
680 else:
681 raise KeyError(f"{key} not found in Config")
683 def update(self, other):
684 """Update config from other `Config` or `dict`.
686 Like `dict.update()`, but will add or modify keys in nested dicts,
687 instead of overwriting the nested dict entirely.
689 Parameters
690 ----------
691 other : `dict` or `Config`
692 Source of configuration:
694 Examples
695 --------
696 >>> c = Config({"a": {"b": 1}})
697 >>> c.update({"a": {"c": 2}})
698 >>> print(c)
699 {'a': {'b': 1, 'c': 2}}
701 >>> foo = {"a": {"b": 1}}
702 >>> foo.update({"a": {"c": 2}})
703 >>> print(foo)
704 {'a': {'c': 2}}
705 """
706 _doUpdate(self._data, other)
708 def merge(self, other):
709 """Merge another Config into this one.
711 Like `Config.update()`, but will add keys & values from other that
712 DO NOT EXIST in self.
714 Keys and values that already exist in self will NOT be overwritten.
716 Parameters
717 ----------
718 other : `dict` or `Config`
719 Source of configuration:
720 """
721 if not isinstance(other, Mapping):
722 raise TypeError(f"Can only merge a Mapping into a Config, not {type(other)}")
724 # Convert the supplied mapping to a Config for consistency
725 # This will do a deepcopy if it is already a Config
726 otherCopy = Config(other)
727 otherCopy.update(self)
728 self._data = otherCopy._data
730 def nameTuples(self, topLevelOnly: bool = False) -> list[tuple[str, ...]]:
731 """Get tuples representing the name hierarchies of all keys.
733 The tuples returned from this method are guaranteed to be usable
734 to access items in the configuration object.
736 Parameters
737 ----------
738 topLevelOnly : `bool`, optional
739 If False, the default, a full hierarchy of names is returned.
740 If True, only the top level are returned.
742 Returns
743 -------
744 names : `list` of `tuple` of `str`
745 List of all names present in the `Config` where each element
746 in the list is a `tuple` of strings representing the hierarchy.
747 """
748 if topLevelOnly: 748 ↛ 749line 748 didn't jump to line 749, because the condition on line 748 was never true
749 return list((k,) for k in self)
751 def getKeysAsTuples(
752 d: Mapping[str, Any] | Sequence[str], keys: list[tuple[str, ...]], base: tuple[str, ...] | None
753 ) -> None:
754 if isinstance(d, Sequence):
755 theseKeys: Iterable[Any] = range(len(d))
756 else:
757 theseKeys = d.keys()
758 for key in theseKeys:
759 val = d[key]
760 levelKey = base + (key,) if base is not None else (key,)
761 keys.append(levelKey)
762 if isinstance(val, (Mapping, Sequence)) and not isinstance(val, str):
763 getKeysAsTuples(val, keys, levelKey)
765 keys: list[tuple[str, ...]] = []
766 getKeysAsTuples(self._data, keys, None)
767 return keys
769 def names(self, topLevelOnly=False, delimiter=None):
770 """Get a delimited name of all the keys in the hierarchy.
772 The values returned from this method are guaranteed to be usable
773 to access items in the configuration object.
775 Parameters
776 ----------
777 topLevelOnly : `bool`, optional
778 If False, the default, a full hierarchy of names is returned.
779 If True, only the top level are returned.
780 delimiter : `str`, optional
781 Delimiter to use when forming the keys. If the delimiter is
782 present in any of the keys, it will be escaped in the returned
783 names. If `None` given a delimiter will be automatically provided.
784 The delimiter can not be alphanumeric.
786 Returns
787 -------
788 names : `list` of `str`
789 List of all names present in the `Config`.
791 Notes
792 -----
793 This is different than the built-in method `dict.keys`, which will
794 return only the first level keys.
796 Raises
797 ------
798 ValueError:
799 The supplied delimiter is alphanumeric.
800 """
801 if topLevelOnly:
802 return list(self.keys())
804 # Get all the tuples of hierarchical keys
805 nameTuples = self.nameTuples()
807 if delimiter is not None and delimiter.isalnum():
808 raise ValueError(f"Supplied delimiter ({delimiter!r}) must not be alphanumeric.")
810 if delimiter is None:
811 # Start with something, and ensure it does not need to be
812 # escaped (it is much easier to understand if not escaped)
813 delimiter = self._D
815 # Form big string for easy check of delimiter clash
816 combined = "".join("".join(str(s) for s in k) for k in nameTuples)
818 # Try a delimiter and keep trying until we get something that
819 # works.
820 ntries = 0
821 while delimiter in combined:
822 log.debug("Delimiter '%s' could not be used. Trying another.", delimiter)
823 ntries += 1
825 if ntries > 100:
826 raise ValueError(f"Unable to determine a delimiter for Config {self}")
828 # try another one
829 while True:
830 delimiter = chr(ord(delimiter) + 1)
831 if not delimiter.isalnum():
832 break
834 log.debug("Using delimiter %r", delimiter)
836 # Form the keys, escaping the delimiter if necessary
837 strings = [
838 delimiter + delimiter.join(str(s).replace(delimiter, f"\\{delimiter}") for s in k)
839 for k in nameTuples
840 ]
841 return strings
843 def asArray(self, name):
844 """Get a value as an array.
846 May contain one or more elements.
848 Parameters
849 ----------
850 name : `str`
851 Key to use to retrieve value.
853 Returns
854 -------
855 array : `collections.abc.Sequence`
856 The value corresponding to name, but guaranteed to be returned
857 as a list with at least one element. If the value is a
858 `~collections.abc.Sequence` (and not a `str`) the value itself
859 will be returned, else the value will be the first element.
860 """
861 val = self.get(name)
862 if isinstance(val, str):
863 val = [val]
864 elif not isinstance(val, Sequence):
865 val = [val]
866 return val
868 def __eq__(self, other):
869 if isinstance(other, Config):
870 other = other._data
871 return self._data == other
873 def __ne__(self, other):
874 if isinstance(other, Config):
875 other = other._data
876 return self._data != other
878 #######
879 # i/o #
881 def dump(self, output: IO | None = None, format: str = "yaml") -> str | None:
882 """Write the config to an output stream.
884 Parameters
885 ----------
886 output : `IO`, optional
887 The stream to use for output. If `None` the serialized content
888 will be returned.
889 format : `str`, optional
890 The format to use for the output. Can be "yaml" or "json".
892 Returns
893 -------
894 serialized : `str` or `None`
895 If a stream was given the stream will be used and the return
896 value will be `None`. If the stream was `None` the
897 serialization will be returned as a string.
898 """
899 if format == "yaml":
900 return yaml.safe_dump(self._data, output, default_flow_style=False)
901 elif format == "json":
902 if output is not None:
903 json.dump(self._data, output, ensure_ascii=False)
904 return None
905 else:
906 return json.dumps(self._data, ensure_ascii=False)
907 raise ValueError(f"Unsupported format for Config serialization: {format}")
909 def dumpToUri(
910 self,
911 uri: ResourcePathExpression,
912 updateFile: bool = True,
913 defaultFileName: str = "butler.yaml",
914 overwrite: bool = True,
915 ) -> None:
916 """Write the config to location pointed to by given URI.
918 Currently supports 's3' and 'file' URI schemes.
920 Parameters
921 ----------
922 uri: `lsst.resources.ResourcePathExpression`
923 URI of location where the Config will be written.
924 updateFile : bool, optional
925 If True and uri does not end on a filename with extension, will
926 append `defaultFileName` to the target uri. True by default.
927 defaultFileName : bool, optional
928 The file name that will be appended to target uri if updateFile is
929 True and uri does not end on a file with an extension.
930 overwrite : bool, optional
931 If True the configuration will be written even if it already
932 exists at that location.
933 """
934 # Make local copy of URI or create new one
935 uri = ResourcePath(uri)
937 if updateFile and not uri.getExtension():
938 uri = uri.updatedFile(defaultFileName)
940 # Try to work out the format from the extension
941 ext = uri.getExtension()
942 format = ext[1:].lower()
944 output = self.dump(format=format)
945 assert output is not None, "Config.dump guarantees not-None return when output arg is None"
946 uri.write(output.encode(), overwrite=overwrite)
947 self.configFile = uri
949 @staticmethod
950 def updateParameters(configType, config, full, toUpdate=None, toCopy=None, overwrite=True, toMerge=None):
951 """Update specific config parameters.
953 Allows for named parameters to be set to new values in bulk, and
954 for other values to be set by copying from a reference config.
956 Assumes that the supplied config is compatible with ``configType``
957 and will attach the updated values to the supplied config by
958 looking for the related component key. It is assumed that
959 ``config`` and ``full`` are from the same part of the
960 configuration hierarchy.
962 Parameters
963 ----------
964 configType : `ConfigSubset`
965 Config type to use to extract relevant items from ``config``.
966 config : `Config`
967 A `Config` to update. Only the subset understood by
968 the supplied `ConfigSubset` will be modified. Default values
969 will not be inserted and the content will not be validated
970 since mandatory keys are allowed to be missing until
971 populated later by merging.
972 full : `Config`
973 A complete config with all defaults expanded that can be
974 converted to a ``configType``. Read-only and will not be
975 modified by this method. Values are read from here if
976 ``toCopy`` is defined.
978 Repository-specific options that should not be obtained
979 from defaults when Butler instances are constructed
980 should be copied from ``full`` to ``config``.
981 toUpdate : `dict`, optional
982 A `dict` defining the keys to update and the new value to use.
983 The keys and values can be any supported by `Config`
984 assignment.
985 toCopy : `tuple`, optional
986 `tuple` of keys whose values should be copied from ``full``
987 into ``config``.
988 overwrite : `bool`, optional
989 If `False`, do not modify a value in ``config`` if the key
990 already exists. Default is always to overwrite.
991 toMerge : `tuple`, optional
992 Keys to merge content from full to config without overwriting
993 pre-existing values. Only works if the key refers to a hierarchy.
994 The ``overwrite`` flag is ignored.
996 Raises
997 ------
998 ValueError
999 Neither ``toUpdate``, ``toCopy`` nor ``toMerge`` were defined.
1000 """
1001 if toUpdate is None and toCopy is None and toMerge is None:
1002 raise ValueError("At least one of toUpdate, toCopy, or toMerge parameters must be set.")
1004 # If this is a parent configuration then we need to ensure that
1005 # the supplied config has the relevant component key in it.
1006 # If this is a parent configuration we add in the stub entry
1007 # so that the ConfigSubset constructor will do the right thing.
1008 # We check full for this since that is guaranteed to be complete.
1009 if configType.component in full and configType.component not in config:
1010 config[configType.component] = {}
1012 # Extract the part of the config we wish to update
1013 localConfig = configType(config, mergeDefaults=False, validate=False)
1015 if toUpdate:
1016 for key, value in toUpdate.items():
1017 if key in localConfig and not overwrite:
1018 log.debug(
1019 "Not overriding key '%s' with value '%s' in config %s",
1020 key,
1021 value,
1022 localConfig.__class__.__name__,
1023 )
1024 else:
1025 localConfig[key] = value
1027 if toCopy or toMerge:
1028 localFullConfig = configType(full, mergeDefaults=False)
1030 if toCopy:
1031 for key in toCopy:
1032 if key in localConfig and not overwrite:
1033 log.debug(
1034 "Not overriding key '%s' from defaults in config %s",
1035 key,
1036 localConfig.__class__.__name__,
1037 )
1038 else:
1039 localConfig[key] = localFullConfig[key]
1040 if toMerge:
1041 for key in toMerge:
1042 if key in localConfig:
1043 # Get the node from the config to do the merge
1044 # but then have to reattach to the config.
1045 subset = localConfig[key]
1046 subset.merge(localFullConfig[key])
1047 localConfig[key] = subset
1048 else:
1049 localConfig[key] = localFullConfig[key]
1051 # Reattach to parent if this is a child config
1052 if configType.component in config:
1053 config[configType.component] = localConfig
1054 else:
1055 config.update(localConfig)
1057 def toDict(self):
1058 """Convert a `Config` to a standalone hierarchical `dict`.
1060 Returns
1061 -------
1062 d : `dict`
1063 The standalone hierarchical `dict` with any `Config` classes
1064 in the hierarchy converted to `dict`.
1066 Notes
1067 -----
1068 This can be useful when passing a Config to some code that
1069 expects native Python types.
1070 """
1071 output = copy.deepcopy(self._data)
1072 for k, v in output.items():
1073 if isinstance(v, Config): 1073 ↛ 1074line 1073 didn't jump to line 1074, because the condition on line 1073 was never true
1074 v = v.toDict()
1075 output[k] = v
1076 return output
1079class ConfigSubset(Config):
1080 """Config representing a subset of a more general configuration.
1082 Subclasses define their own component and when given a configuration
1083 that includes that component, the resulting configuration only includes
1084 the subset. For example, your config might contain ``dimensions`` if it's
1085 part of a global config and that subset will be stored. If ``dimensions``
1086 can not be found it is assumed that the entire contents of the
1087 configuration should be used.
1089 Default values are read from the environment or supplied search paths
1090 using the default configuration file name specified in the subclass.
1091 This allows a configuration class to be instantiated without any
1092 additional arguments.
1094 Additional validation can be specified to check for keys that are mandatory
1095 in the configuration.
1097 Parameters
1098 ----------
1099 other : `Config` or `str` or `dict`
1100 Argument specifying the configuration information as understood
1101 by `Config`
1102 validate : `bool`, optional
1103 If `True` required keys will be checked to ensure configuration
1104 consistency.
1105 mergeDefaults : `bool`, optional
1106 If `True` defaults will be read and the supplied config will
1107 be combined with the defaults, with the supplied values taking
1108 precedence.
1109 searchPaths : `list` or `tuple`, optional
1110 Explicit additional paths to search for defaults. They should
1111 be supplied in priority order. These paths have higher priority
1112 than those read from the environment in
1113 `ConfigSubset.defaultSearchPaths()`. Paths can be `str` referring to
1114 the local file system or URIs, `lsst.resources.ResourcePath`.
1115 """
1117 component: ClassVar[str | None] = None
1118 """Component to use from supplied config. Can be None. If specified the
1119 key is not required. Can be a full dot-separated path to a component.
1120 """
1122 requiredKeys: ClassVar[Sequence[str]] = ()
1123 """Keys that are required to be specified in the configuration.
1124 """
1126 defaultConfigFile: ClassVar[str | None] = None
1127 """Name of the file containing defaults for this config class.
1128 """
1130 def __init__(self, other=None, validate=True, mergeDefaults=True, searchPaths=None):
1131 # Create a blank object to receive the defaults
1132 # Once we have the defaults we then update with the external values
1133 super().__init__()
1135 # Create a standard Config rather than subset
1136 externalConfig = Config(other)
1138 # Select the part we need from it
1139 # To simplify the use of !include we also check for the existence of
1140 # component.component (since the included files can themselves
1141 # include the component name)
1142 if self.component is not None: 1142 ↛ 1151line 1142 didn't jump to line 1151, because the condition on line 1142 was never false
1143 doubled = (self.component, self.component)
1144 # Must check for double depth first
1145 if doubled in externalConfig: 1145 ↛ 1146line 1145 didn't jump to line 1146, because the condition on line 1145 was never true
1146 externalConfig = externalConfig[doubled]
1147 elif self.component in externalConfig:
1148 externalConfig._data = externalConfig._data[self.component]
1150 # Default files read to create this configuration
1151 self.filesRead = []
1153 # Assume we are not looking up child configurations
1154 containerKey = None
1156 # Sometimes we do not want to merge with defaults.
1157 if mergeDefaults:
1158 # Supplied search paths have highest priority
1159 fullSearchPath = []
1160 if searchPaths: 1160 ↛ 1161line 1160 didn't jump to line 1161, because the condition on line 1160 was never true
1161 fullSearchPath.extend(searchPaths)
1163 # Read default paths from environment
1164 fullSearchPath.extend(self.defaultSearchPaths())
1166 # There are two places to find defaults for this particular config
1167 # - The "defaultConfigFile" defined in the subclass
1168 # - The class specified in the "cls" element in the config.
1169 # Read cls after merging in case it changes.
1170 if self.defaultConfigFile is not None: 1170 ↛ 1175line 1170 didn't jump to line 1175, because the condition on line 1170 was never false
1171 self._updateWithConfigsFromPath(fullSearchPath, self.defaultConfigFile)
1173 # Can have a class specification in the external config (priority)
1174 # or from the defaults.
1175 pytype = None
1176 if "cls" in externalConfig: 1176 ↛ 1177line 1176 didn't jump to line 1177, because the condition on line 1176 was never true
1177 pytype = externalConfig["cls"]
1178 elif "cls" in self: 1178 ↛ 1179line 1178 didn't jump to line 1179, because the condition on line 1178 was never true
1179 pytype = self["cls"]
1181 if pytype is not None: 1181 ↛ 1182line 1181 didn't jump to line 1182, because the condition on line 1181 was never true
1182 try:
1183 cls = doImport(pytype)
1184 except ImportError as e:
1185 raise RuntimeError(f"Failed to import cls '{pytype}' for config {type(self)}") from e
1186 defaultsFile = cls.defaultConfigFile
1187 if defaultsFile is not None:
1188 self._updateWithConfigsFromPath(fullSearchPath, defaultsFile)
1190 # Get the container key in case we need it
1191 try:
1192 containerKey = cls.containerKey
1193 except AttributeError:
1194 pass
1196 # Now update this object with the external values so that the external
1197 # values always override the defaults
1198 self.update(externalConfig)
1199 if not self.configFile: 1199 ↛ 1205line 1199 didn't jump to line 1205, because the condition on line 1199 was never false
1200 self.configFile = externalConfig.configFile
1202 # If this configuration has child configurations of the same
1203 # config class, we need to expand those defaults as well.
1205 if mergeDefaults and containerKey is not None and containerKey in self: 1205 ↛ 1206line 1205 didn't jump to line 1206, because the condition on line 1205 was never true
1206 for idx, subConfig in enumerate(self[containerKey]):
1207 self[containerKey, idx] = type(self)(
1208 other=subConfig, validate=validate, mergeDefaults=mergeDefaults, searchPaths=searchPaths
1209 )
1211 if validate:
1212 self.validate()
1214 @classmethod
1215 def defaultSearchPaths(cls) -> list[ResourcePath | str]:
1216 """Read environment to determine search paths to use.
1218 Global defaults, at lowest priority, are found in the ``config``
1219 directory of the butler source tree. Additional defaults can be
1220 defined using the environment variable ``$DAF_BUTLER_CONFIG_PATHS``
1221 which is a PATH-like variable where paths at the front of the list
1222 have priority over those later.
1224 Returns
1225 -------
1226 paths : `list`
1227 Returns a list of paths to search. The returned order is in
1228 priority with the highest priority paths first. The butler config
1229 configuration resources will not be included here but will
1230 always be searched last.
1232 Notes
1233 -----
1234 The environment variable is split on the standard ``:`` path separator.
1235 This currently makes it incompatible with usage of URIs.
1236 """
1237 # We can pick up defaults from multiple search paths
1238 # We fill defaults by using the butler config path and then
1239 # the config path environment variable in reverse order.
1240 defaultsPaths: list[str | ResourcePath] = []
1242 if CONFIG_PATH in os.environ: 1242 ↛ 1243line 1242 didn't jump to line 1243, because the condition on line 1242 was never true
1243 externalPaths = os.environ[CONFIG_PATH].split(os.pathsep)
1244 defaultsPaths.extend(externalPaths)
1246 # Add the package defaults as a resource
1247 defaultsPaths.append(ResourcePath(f"resource://{cls.resourcesPackage}/configs", forceDirectory=True))
1248 return defaultsPaths
1250 def _updateWithConfigsFromPath(self, searchPaths, configFile):
1251 """Search the supplied paths, merging the configuration values.
1253 The values read will override values currently stored in the object.
1254 Every file found in the path will be read, such that the earlier
1255 path entries have higher priority.
1257 Parameters
1258 ----------
1259 searchPaths : `list` of `lsst.resources.ResourcePath`, `str`
1260 Paths to search for the supplied configFile. This path
1261 is the priority order, such that files read from the
1262 first path entry will be selected over those read from
1263 a later path. Can contain `str` referring to the local file
1264 system or a URI string.
1265 configFile : `lsst.resources.ResourcePath`
1266 File to locate in path. If absolute path it will be read
1267 directly and the search path will not be used. Can be a URI
1268 to an explicit resource (which will ignore the search path)
1269 which is assumed to exist.
1270 """
1271 uri = ResourcePath(configFile)
1272 if uri.isabs() and uri.exists(): 1272 ↛ 1274line 1272 didn't jump to line 1274, because the condition on line 1272 was never true
1273 # Assume this resource exists
1274 self._updateWithOtherConfigFile(configFile)
1275 self.filesRead.append(configFile)
1276 else:
1277 # Reverse order so that high priority entries
1278 # update the object last.
1279 for pathDir in reversed(searchPaths):
1280 if isinstance(pathDir, (str, ResourcePath)): 1280 ↛ 1287line 1280 didn't jump to line 1287, because the condition on line 1280 was never false
1281 pathDir = ResourcePath(pathDir, forceDirectory=True)
1282 file = pathDir.join(configFile)
1283 if file.exists(): 1283 ↛ 1279line 1283 didn't jump to line 1279, because the condition on line 1283 was never false
1284 self.filesRead.append(file)
1285 self._updateWithOtherConfigFile(file)
1286 else:
1287 raise ValueError(f"Unexpected search path type encountered: {pathDir!r}")
1289 def _updateWithOtherConfigFile(self, file):
1290 """Read in some defaults and update.
1292 Update the configuration by reading the supplied file as a config
1293 of this class, and merging such that these values override the
1294 current values. Contents of the external config are not validated.
1296 Parameters
1297 ----------
1298 file : `Config`, `str`, `lsst.resources.ResourcePath`, or `dict`
1299 Entity that can be converted to a `ConfigSubset`.
1300 """
1301 # Use this class to read the defaults so that subsetting can happen
1302 # correctly.
1303 externalConfig = type(self)(file, validate=False, mergeDefaults=False)
1304 self.update(externalConfig)
1306 def validate(self):
1307 """Check that mandatory keys are present in this configuration.
1309 Ignored if ``requiredKeys`` is empty.
1310 """
1311 # Validation
1312 missing = [k for k in self.requiredKeys if k not in self._data]
1313 if missing: 1313 ↛ 1314line 1313 didn't jump to line 1314, because the condition on line 1313 was never true
1314 raise KeyError(f"Mandatory keys ({missing}) missing from supplied configuration for {type(self)}")